This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch 3.0.x in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/3.0.x by this push: new e2aaec4 KYLIN-4425 Refactor Diagnosis Module e2aaec4 is described below commit e2aaec4219d4cd7e37d87d5f0d002c791815c3cc Author: XiaoxiangYu <hit_la...@126.com> AuthorDate: Thu Mar 12 17:16:20 2020 +0800 KYLIN-4425 Refactor Diagnosis Module --- build/bin/diag.sh | 17 ++ .../org/apache/kylin/common/util/HadoopUtil.java | 1 - .../kylin/rest/service/DiagnosisService.java | 5 +- tool-assembly/pom.xml | 10 ++ tool/pom.xml | 6 + .../org/apache/kylin/tool/DiagnosisInfoCLI.java | 53 ++++-- .../org/apache/kylin/tool/JobDiagnosisInfoCLI.java | 112 +++++------- .../{ => extractor}/AbstractInfoExtractor.java | 51 +++++- .../tool/{ => extractor}/ClientEnvExtractor.java | 34 ++-- .../tool/{ => extractor}/CubeMetaExtractor.java | 69 +++++--- .../tool/{ => extractor}/HBaseUsageExtractor.java | 56 ++++-- .../{common => extractor}/HadoopConfExtractor.java | 18 +- .../kylin/tool/{ => extractor}/HiveConfigCLI.java | 2 +- .../kylin/tool/extractor/JStackExtractor.java | 59 +++++++ .../tool/{ => extractor}/JobInstanceExtractor.java | 4 +- .../tool/{ => extractor}/KylinLogExtractor.java | 10 +- .../tool/{ => extractor}/MrJobInfoExtractor.java | 14 +- .../tool/extractor/SparkEnvInfoExtractor.java | 91 ++++++++++ .../kylin/tool/extractor/YarnLogExtractor.java | 194 +++++++++++++++++++++ .../java/org/apache/kylin/tool/util/ToolUtil.java | 13 -- .../apache/kylin/tool/ClientEnvExtractorTest.java | 2 +- .../apache/kylin/tool/CubeMetaExtractorTest.java | 1 + 22 files changed, 644 insertions(+), 178 deletions(-) diff --git a/build/bin/diag.sh b/build/bin/diag.sh index bb1bdc4..6774a80 100755 --- a/build/bin/diag.sh +++ b/build/bin/diag.sh @@ -67,6 +67,23 @@ then fi exit 0 +elif [ $# -gt 2 ] # user can choose to use more flexibly options +then + source ${dir}/find-hive-dependency.sh + + if [ -f "${dir}/setenv-tool.sh" ] + then source ${dir}/setenv-tool.sh + fi + mkdir -p ${KYLIN_HOME}/ext + export HBASE_CLASSPATH_PREFIX=${KYLIN_HOME}/conf:${KYLIN_HOME}/tool/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH_PREFIX} + export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency} + + hbase ${KYLIN_EXTRA_START_OPTS} \ + -Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties \ + -Dcatalina.home=${tomcat_root} \ + "$@" + + exit 0 else echo "usage: diag.sh Project|JobId [target_path]" exit 1 diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java index 5187361..98c7b30 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HadoopUtil.java @@ -75,7 +75,6 @@ public class HadoopUtil { } // https://issues.apache.org/jira/browse/KYLIN-3064 conf.set("yarn.timeline-service.enabled", "false"); - return conf; } diff --git a/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java b/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java index 528858b..e046c3d 100644 --- a/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java +++ b/server-base/src/main/java/org/apache/kylin/rest/service/DiagnosisService.java @@ -98,9 +98,8 @@ public class DiagnosisService extends BasicService { Message msg = MsgPicker.getMsg(); File cwd = new File(""); - logger.debug("Current path: " + cwd.getAbsolutePath()); - - logger.debug("DiagnosisInfoCLI args: " + Arrays.toString(args)); + logger.debug("Current path: {}", cwd.getAbsolutePath()); + logger.debug("DiagnosisInfoCLI args: {}", Arrays.toString(args)); File script = new File(KylinConfig.getKylinHome() + File.separator + "bin", "diag.sh"); if (!script.exists()) { throw new BadRequestException( diff --git a/tool-assembly/pom.xml b/tool-assembly/pom.xml index 4d5626c..c87b405 100644 --- a/tool-assembly/pom.xml +++ b/tool-assembly/pom.xml @@ -63,6 +63,11 @@ <scope>compile</scope> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <scope>compile</scope> + </dependency> + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <scope>compile</scope> @@ -98,6 +103,7 @@ <include>log4j:log4j</include> <include>commons-io:commons-io</include> <include>commons-lang:commons-lang</include> + <include>org.apache.commons:commons-compress</include> <include>org.apache.commons:commons-lang3</include> <include>org.apache.commons:commons-email</include> <include>com.google.guava:guava</include> @@ -119,6 +125,10 @@ <shadedPattern>${shadeBase}.org.apache.commons.io</shadedPattern> </relocation> <relocation> + <pattern>org.apache.commons.compress</pattern> + <shadedPattern>${shadeBase}.org.apache.commons.compress</shadedPattern> + </relocation> + <relocation> <pattern>org.apache.commons.lang</pattern> <shadedPattern>${shadeBase}.org.apache.commons.lang</shadedPattern> </relocation> diff --git a/tool/pom.xml b/tool/pom.xml index 166893b..2f23229 100644 --- a/tool/pom.xml +++ b/tool/pom.xml @@ -63,6 +63,12 @@ <groupId>org.apache.kylin</groupId> <artifactId>kylin-server-base</artifactId> </dependency> + + <dependency> + <groupId>org.apache.kylin</groupId> + <artifactId>kylin-stream-core</artifactId> + </dependency> + <!--Env--> <dependency> <groupId>org.apache.hbase</groupId> diff --git a/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java b/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java index 9063e9e..12077c0 100644 --- a/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java +++ b/tool/src/main/java/org/apache/kylin/tool/DiagnosisInfoCLI.java @@ -37,6 +37,14 @@ import org.apache.kylin.common.util.OptionsHelper; import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.project.ProjectManager; import org.apache.kylin.tool.util.ToolUtil; +import org.apache.kylin.stream.core.util.NamedThreadFactory; +import org.apache.kylin.tool.extractor.AbstractInfoExtractor; +import org.apache.kylin.tool.extractor.ClientEnvExtractor; +import org.apache.kylin.tool.extractor.CubeMetaExtractor; +import org.apache.kylin.tool.extractor.JStackExtractor; +import org.apache.kylin.tool.extractor.JobInstanceExtractor; +import org.apache.kylin.tool.extractor.KylinLogExtractor; +import org.apache.kylin.tool.extractor.SparkEnvInfoExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -142,7 +150,7 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { : DEFAULT_PERIOD; logger.info("Start diagnosis info extraction in {} threads.", threadsNum); - executorService = Executors.newFixedThreadPool(threadsNum); + executorService = Executors.newFixedThreadPool(threadsNum, new NamedThreadFactory("GeneralDiagnosis")); // export cube metadata executorService.execute(new Runnable() { @@ -150,9 +158,9 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { public void run() { logger.info("Start to extract metadata."); try { - String[] cubeMetaArgs = { "-packagetype", "cubemeta", "-destDir", + String[] cubeMetaArgs = {"-packagetype", "cubemeta", "-destDir", new File(exportDir, "metadata").getAbsolutePath(), "-project", projectNames, "-compress", - "false", "-includeJobs", "false", "-submodule", "true" }; + "false", "-includeJobs", "false", "-submodule", "true"}; CubeMetaExtractor cubeMetaExtractor = new CubeMetaExtractor(); logger.info("CubeMetaExtractor args: " + Arrays.toString(cubeMetaArgs)); cubeMetaExtractor.execute(cubeMetaArgs); @@ -169,8 +177,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { public void run() { logger.info("Start to extract jobs."); try { - String[] jobArgs = { "-destDir", new File(exportDir, "jobs").getAbsolutePath(), "-period", - Integer.toString(period), "-compress", "false", "-submodule", "true" }; + String[] jobArgs = {"-destDir", new File(exportDir, "jobs").getAbsolutePath(), "-period", + Integer.toString(period), "-compress", "false", "-submodule", "true"}; JobInstanceExtractor jobInstanceExtractor = new JobInstanceExtractor(); jobInstanceExtractor.execute(jobArgs); } catch (Exception e) { @@ -188,13 +196,13 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { logger.info("Start to extract HBase usage."); try { // use reflection to isolate NoClassDef errors when HBase is not available - String[] hbaseArgs = { "-destDir", new File(exportDir, "hbase").getAbsolutePath(), "-project", - projectNames, "-compress", "false", "-submodule", "true" }; + String[] hbaseArgs = {"-destDir", new File(exportDir, "hbase").getAbsolutePath(), "-project", + projectNames, "-compress", "false", "-submodule", "true"}; logger.info("HBaseUsageExtractor args: " + Arrays.toString(hbaseArgs)); - Object extractor = ClassUtil.newInstance("org.apache.kylin.tool.HBaseUsageExtractor"); + Object extractor = ClassUtil.newInstance("org.apache.kylin.tool.extractor.HBaseUsageExtractor"); Method execute = extractor.getClass().getMethod("execute", String[].class); execute.invoke(extractor, (Object) hbaseArgs); - } catch (Throwable e) { + } catch (Exception e) { logger.error("Error in export HBase usage.", e); } } @@ -232,8 +240,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { @Override public void run() { try { - String[] clientArgs = { "-destDir", new File(exportDir, "client").getAbsolutePath(), - "-compress", "false", "-submodule", "true" }; + String[] clientArgs = {"-destDir", new File(exportDir, "client").getAbsolutePath(), + "-compress", "false", "-submodule", "true"}; ClientEnvExtractor clientEnvExtractor = new ClientEnvExtractor(); logger.info("ClientEnvExtractor args: " + Arrays.toString(clientArgs)); clientEnvExtractor.execute(clientArgs); @@ -250,8 +258,8 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { public void run() { logger.info("Start to extract logs."); try { - String[] logsArgs = { "-destDir", new File(exportDir, "logs").getAbsolutePath(), "-logPeriod", - Integer.toString(period), "-compress", "false", "-submodule", "true" }; + String[] logsArgs = {"-destDir", new File(exportDir, "logs").getAbsolutePath(), "-logPeriod", + Integer.toString(period), "-compress", "false", "-submodule", "true"}; KylinLogExtractor logExtractor = new KylinLogExtractor(); logger.info("KylinLogExtractor args: " + Arrays.toString(logsArgs)); logExtractor.execute(logsArgs); @@ -261,8 +269,27 @@ public class DiagnosisInfoCLI extends AbstractInfoExtractor { } }); + // dump jstack + String[] jstackDumpArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule", + "true", "-submodule", "true"}; + logger.info("JStackExtractor args: {}", Arrays.toString(jstackDumpArgs)); + try { + new JStackExtractor().execute(jstackDumpArgs); + } catch (Exception e) { + logger.error("Error execute jstack dump extractor"); + } + + // export spark conf + String[] sparkEnvArgs = {"-destDir", new File(exportDir, "spark").getAbsolutePath(), "-compress", "false", "-submodule", "true"}; + try { + new SparkEnvInfoExtractor().execute(sparkEnvArgs); + } catch (Exception e) { + logger.error("Error execute spark extractor"); + } + executorService.shutdown(); try { + logger.info("Waiting for completed."); executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES); } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java b/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java index 8fec48e..cbb32bb 100644 --- a/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java +++ b/tool/src/main/java/org/apache/kylin/tool/JobDiagnosisInfoCLI.java @@ -19,10 +19,8 @@ package org.apache.kylin.tool; import java.io.File; -import java.io.IOException; import java.util.Arrays; import java.util.List; -import java.util.Map; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; @@ -32,16 +30,20 @@ import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.ResourceTool; import org.apache.kylin.common.util.OptionsHelper; -import org.apache.kylin.common.util.StringUtil; -import org.apache.kylin.job.constant.ExecutableConstants; import org.apache.kylin.job.dao.ExecutableDao; import org.apache.kylin.job.dao.ExecutablePO; import org.apache.kylin.tool.util.ToolUtil; +import org.apache.kylin.tool.extractor.AbstractInfoExtractor; +import org.apache.kylin.tool.extractor.ClientEnvExtractor; +import org.apache.kylin.tool.extractor.CubeMetaExtractor; +import org.apache.kylin.tool.extractor.JStackExtractor; +import org.apache.kylin.tool.extractor.KylinLogExtractor; +import org.apache.kylin.tool.extractor.SparkEnvInfoExtractor; +import org.apache.kylin.tool.extractor.YarnLogExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { private static final Logger logger = LoggerFactory.getLogger(JobDiagnosisInfoCLI.class); @@ -73,7 +75,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { .create("includeConf"); List<String> requiredResources = Lists.newArrayList(); - List<String> yarnLogsResources = Lists.newArrayList(); private KylinConfig kylinConfig; private ExecutableDao executableDao; @@ -121,9 +122,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { for (ExecutablePO kylinTask : executablePO.getTasks()) { addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + kylinTask.getUuid()); addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + kylinTask.getUuid()); - if (includeYarnLogs) { - yarnLogsResources.add(kylinTask.getUuid()); - } } extractResources(exportDir); @@ -133,30 +131,30 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { if (!StringUtils.isEmpty(cubeName)) { File metaDir = new File(exportDir, "cube"); FileUtils.forceMkdir(metaDir); - String[] cubeMetaArgs = { "-packagetype", "cubemeta", "-cube", cubeName, "-destDir", + String[] cubeMetaArgs = {"-packagetype", "cubemeta", "-cube", cubeName, "-destDir", new File(metaDir, cubeName).getAbsolutePath(), "-includeJobs", "false", "-compress", "false", - "-submodule", "true" }; - logger.info("Start to extract related cube: " + StringUtils.join(cubeMetaArgs)); + "-submodule", "true"}; + logger.info("Start to extract related cube: {}", StringUtils.join(cubeMetaArgs)); CubeMetaExtractor cubeMetaExtractor = new CubeMetaExtractor(); - logger.info("CubeMetaExtractor args: " + Arrays.toString(cubeMetaArgs)); + logger.info("CubeMetaExtractor args: {}", Arrays.toString(cubeMetaArgs)); cubeMetaExtractor.execute(cubeMetaArgs); } } // dump mr job info if (includeYarnLogs) { - logger.info("Start to dump mr job info: " + kylinJobId); + YarnLogExtractor yarnLogExtractor = new YarnLogExtractor(); + logger.info("Start to dump mr job info: {}", kylinJobId); File yarnDir = new File(exportDir, "yarn"); FileUtils.forceMkdir(yarnDir); - for (String stepId : yarnLogsResources) { - extractJobInfo(stepId, new File(yarnDir, stepId)); - extractJobLog(stepId, new File(yarnDir, stepId), true); - } + yarnLogExtractor.extractYarnLogAndMRJob(kylinJobId, new File(yarnDir, kylinJobId)); } + // host info if (includeClient) { - String[] clientArgs = { "-destDir", new File(exportDir, "client").getAbsolutePath(), "-compress", "false", - "-submodule", "true" }; + logger.info("Start to extract client info."); + String[] clientArgs = {"-destDir", new File(exportDir, "client").getAbsolutePath(), "-compress", "false", + "-submodule", "true"}; ClientEnvExtractor clientEnvExtractor = new ClientEnvExtractor(); logger.info("ClientEnvExtractor args: " + Arrays.toString(clientArgs)); clientEnvExtractor.execute(clientArgs); @@ -172,9 +170,28 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { } } + // dump jstack + String[] jstackDumpArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule", + "true"}; + logger.info("JStackExtractor args: {}", Arrays.toString(jstackDumpArgs)); + try { + new JStackExtractor().execute(jstackDumpArgs); + } catch (Exception e) { + logger.error("Error execute jstack dump extractor"); + } + + // export spark conf + String[] sparkEnvArgs = {"-destDir", exportDir.getAbsolutePath(), "-compress", "false", "-submodule", + "true"}; + try { + new SparkEnvInfoExtractor().execute(sparkEnvArgs); + } catch (Exception e) { + logger.error("Error execute spark extractor"); + } + // export kylin logs - String[] logsArgs = { "-destDir", new File(exportDir, "logs").getAbsolutePath(), "-compress", "false", - "-submodule", "true" }; + String[] logsArgs = {"-destDir", new File(exportDir, "logs").getAbsolutePath(), "-compress", "false", + "-submodule", "true"}; KylinLogExtractor logExtractor = new KylinLogExtractor(); logger.info("KylinLogExtractor args: " + Arrays.toString(logsArgs)); logExtractor.execute(logsArgs); @@ -195,57 +212,6 @@ public class JobDiagnosisInfoCLI extends AbstractInfoExtractor { } } - private void extractJobLog(String taskId, File destDir, boolean onlyFail) throws Exception { - final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo(); - FileUtils.forceMkdir(destDir); - if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) { - String applicationId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application"); - if (!onlyFail || !isYarnAppSucc(applicationId)) { - File destFile = new File(destDir, applicationId + ".log"); - String yarnCmd = "yarn logs -applicationId " + applicationId + " > " + destFile.getAbsolutePath(); - logger.debug(yarnCmd); - try { - kylinConfig.getCliCommandExecutor().execute(yarnCmd); - } catch (Exception ex) { - logger.warn("Failed to get yarn logs. ", ex); - } - } - } - } - - private void extractJobInfo(String taskId, File destDir) throws Exception { - final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo(); - if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) { - String mrJobId = jobInfo.get(ExecutableConstants.MR_JOB_ID); - FileUtils.forceMkdir(destDir); - String[] mrJobArgs = { "-mrJobId", mrJobId, "-destDir", destDir.getAbsolutePath(), "-compress", "false", - "-submodule", "true" }; - new MrJobInfoExtractor().execute(mrJobArgs); - } - } - - private boolean isYarnAppSucc(String applicationId) throws IOException { - final String yarnCmd = "yarn application -status " + applicationId; - final String cmdOutput = kylinConfig.getCliCommandExecutor().execute(yarnCmd).getSecond(); - final Map<String, String> params = Maps.newHashMap(); - final String[] cmdOutputLines = StringUtil.split(cmdOutput, "\n"); - for (String cmdOutputLine : cmdOutputLines) { - String[] pair = StringUtil.split(cmdOutputLine, ":"); - if (pair.length >= 2) { - params.put(pair[0].trim(), pair[1].trim()); - } - } - for (Map.Entry<String, String> e : params.entrySet()) { - logger.info(e.getKey() + ":" + e.getValue()); - } - - if (params.containsKey("State") && params.get("State").equals("RUNNING")) { - return true; - } - - return params.containsKey("Final-State") && params.get("Final-State").equals("SUCCEEDED"); - } - private void addRequired(String record) { logger.info("adding required resource {}", record); requiredResources.add(record); diff --git a/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java similarity index 81% rename from tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java index 994e4d6..965912e 100644 --- a/tool/src/main/java/org/apache/kylin/tool/AbstractInfoExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/AbstractInfoExtractor.java @@ -17,7 +17,7 @@ * */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; @@ -34,6 +34,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.KylinVersion; import org.apache.kylin.common.util.AbstractApplication; +import org.apache.kylin.common.util.CliCommandExecutor; import org.apache.kylin.common.util.OptionsHelper; import org.apache.kylin.common.util.ZipFileUtils; import org.apache.kylin.tool.util.ToolUtil; @@ -60,7 +61,8 @@ public abstract class AbstractInfoExtractor extends AbstractApplication { .withDescription("specify the package type").create("packagetype"); private static final String DEFAULT_PACKAGE_TYPE = "base"; - private static final String[] COMMIT_SHA1_FILES = { "commit_SHA1", "commit.sha1" }; + private static final String[] COMMIT_SHA1_FILES = {"commit_SHA1", "commit.sha1"}; + protected CliCommandExecutor cmdExecutor; protected final Options options; @@ -74,6 +76,8 @@ public abstract class AbstractInfoExtractor extends AbstractApplication { options.addOption(OPTION_SUBMODULE); options.addOption(OPTION_PACKAGETYPE); packageType = DEFAULT_PACKAGE_TYPE; + + cmdExecutor = KylinConfig.getInstanceFromEnv().getCliCommandExecutor(); } @Override @@ -179,4 +183,47 @@ public abstract class AbstractInfoExtractor extends AbstractApplication { public String getExportDest() { return exportDir.getAbsolutePath(); } + + public static String getKylinPid() { + File pidFile = new File(getKylinHome(), "pid"); + if (pidFile.exists()) { + try { + return FileUtils.readFileToString(pidFile); + } catch (IOException e) { + throw new RuntimeException("Error reading KYLIN PID file.", e); + } + } else { + throw new RuntimeException("Cannot find KYLIN PID file."); + } + } + + public static String getKylinHome() { + String path = System.getProperty(KylinConfig.KYLIN_CONF); + if (StringUtils.isNotEmpty(path)) { + return path; + } + path = KylinConfig.getKylinHome(); + if (StringUtils.isNotEmpty(path)) { + return path; + } + throw new RuntimeException("Cannot find KYLIN_HOME."); + } + + public void addFile(File srcFile, File destDir) { + logger.info("copy file " + srcFile.getName()); + try { + FileUtils.forceMkdir(destDir); + } catch (IOException e) { + logger.error("Can not create" + destDir, e); + } + + File destFile = new File(destDir, srcFile.getName()); + String copyCmd = String.format(Locale.ROOT, "cp -rL %s %s", srcFile.getAbsolutePath(), destFile.getAbsolutePath()); + logger.info("The command is: " + copyCmd); + try { + cmdExecutor.execute(copyCmd); + } catch (Exception e) { + logger.debug("Failed to execute copyCmd", e); + } + } } diff --git a/tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java similarity index 82% rename from tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java index 80741ea..4c8597b 100644 --- a/tool/src/main/java/org/apache/kylin/tool/ClientEnvExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/ClientEnvExtractor.java @@ -17,7 +17,7 @@ * */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; @@ -35,21 +35,25 @@ import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.CliCommandExecutor; import org.apache.kylin.common.util.OptionsHelper; import org.apache.kylin.common.util.Pair; +import org.apache.kylin.stream.core.util.NamedThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.io.Files; +/** + * Host information collection + */ public class ClientEnvExtractor extends AbstractInfoExtractor { private static final Logger logger = LoggerFactory.getLogger(ClientEnvExtractor.class); private KylinConfig kylinConfig; private CliCommandExecutor cmdExecutor; private ExecutorService executorService; - int maxWaitSeconds = 120; + public int maxWaitSeconds = 150; - public ClientEnvExtractor() throws IOException { + public ClientEnvExtractor() { super(); - executorService = Executors.newFixedThreadPool(1); + executorService = Executors.newFixedThreadPool(3, new NamedThreadFactory("ClientEnv")); packageType = "client"; kylinConfig = KylinConfig.getInstanceFromEnv(); cmdExecutor = kylinConfig.getCliCommandExecutor(); @@ -57,6 +61,7 @@ public class ClientEnvExtractor extends AbstractInfoExtractor { @Override protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception { + // dump os info addLocalFile("/sys/kernel/mm/transparent_hugepage/defrag", "linux/transparent_hugepage"); addLocalFile("/proc/sys/vm/swappiness", "linux/swappiness"); @@ -71,8 +76,11 @@ public class ClientEnvExtractor extends AbstractInfoExtractor { addShellOutput("hadoop version", "hadoop", "version"); addShellOutput("hbase version", "hbase", "version"); addShellOutput("hive --version", "hive", "version"); + addShellOutput("klist", "kerberos", "klist"); addShellOutput("beeline --version", "hive", "beeline_version"); - executorService.shutdownNow(); + + logger.info("ClientEnvExtractor is shutting downing. "); + executorService.awaitTermination(maxWaitSeconds, TimeUnit.SECONDS); } private void addLocalFile(String src, String destDir) { @@ -92,7 +100,7 @@ public class ClientEnvExtractor extends AbstractInfoExtractor { } } - void addShellOutput(String cmd, String destDir, String filename) { + public void addShellOutput(String cmd, String destDir, String filename) { Future f = executorService.submit(() -> { try { File destDirFile = null; @@ -102,8 +110,10 @@ public class ClientEnvExtractor extends AbstractInfoExtractor { } else { destDirFile = exportDir; } + logger.debug("Will execute {}", cmd); Pair<Integer, String> result = cmdExecutor.execute(cmd); String output = result.getSecond(); + logger.debug("Execute command {} return {}", cmd, result.getFirst()); FileUtils.writeStringToFile(new File(destDirFile, filename), output, Charset.defaultCharset()); } catch (IOException e) { logger.warn("Failed to run command: " + cmd + ".", e); @@ -113,14 +123,18 @@ public class ClientEnvExtractor extends AbstractInfoExtractor { try { // assume most shell should return in two minutes f.get(maxWaitSeconds, TimeUnit.SECONDS); - } catch (TimeoutException timeoutException) { + } catch (TimeoutException | InterruptedException timeoutException) { logger.error("Timeout for \"{}\" in {} seconds.", cmd, maxWaitSeconds); + f.cancel(true); executorService.shutdownNow(); - executorService = Executors.newFixedThreadPool(1); } catch (ExecutionException runtimeException) { logger.error("Runtime error: {}", runtimeException.getLocalizedMessage()); - } catch (InterruptedException otherException) { - // Ignore + executorService.shutdownNow(); } } + + public static void main(String[] args) { + ClientEnvExtractor extractor = new ClientEnvExtractor(); + extractor.execute(args); + } } diff --git a/tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java similarity index 92% rename from tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java index 6b661ca..0789011 100644 --- a/tool/src/main/java/org/apache/kylin/tool/CubeMetaExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/CubeMetaExtractor.java @@ -16,11 +16,13 @@ * limitations under the License. */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; +import java.util.Collection; import java.util.List; +import java.util.Objects; import java.util.Set; import org.apache.commons.cli.Option; @@ -58,6 +60,8 @@ import org.apache.kylin.metadata.streaming.StreamingManager; import org.apache.kylin.source.kafka.config.KafkaConfig; import org.apache.kylin.storage.hybrid.HybridInstance; import org.apache.kylin.storage.hybrid.HybridManager; +import org.apache.kylin.stream.core.source.StreamingSourceConfig; +import org.apache.kylin.stream.core.source.StreamingSourceConfigManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,7 +89,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { .withDescription("Specify which hybrid to extract").create("hybrid"); @SuppressWarnings("static-access") private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false) - .withDescription("Specify realizations in which project to extract").create("project"); + .withDescription("Specify realizations in which project to extract, project names can be connected by comma.").create("project"); @SuppressWarnings("static-access") private static final Option OPTION_All_PROJECT = OptionBuilder.withArgName("allProjects").hasArg(false) .isRequired(false).withDescription("Specify realizations in all projects to extract").create("allProjects"); @@ -112,14 +116,14 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_ONLY_JOB_OUTPUT = OptionBuilder.withArgName("onlyOutput").hasArg() - .isRequired(false).withDescription("when include jobs, onlt extract output of job. Default true") + .isRequired(false).withDescription("when include jobs, only extract output of job. Default true") .create("onlyOutput"); @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_SEGMENT_DETAILS = OptionBuilder.withArgName("includeSegmentDetails") .hasArg().isRequired(false) .withDescription( - "set this to true if want to extract segment details too, such as dict, tablesnapshot. Default false") + "set this to true if want to extract segment details too, such as dict, table snapshot. Default false") .create("includeSegmentDetails"); private KylinConfig kylinConfig; @@ -128,6 +132,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { private HybridManager hybridManager; private CubeManager cubeManager; private StreamingManager streamingManager; + private StreamingSourceConfigManager streamingSourceConfigManager; private CubeDescManager cubeDescManager; private ExecutableDao executableDao; private RealizationRegistry realizationRegistry; @@ -193,6 +198,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { executableDao = ExecutableDao.getInstance(kylinConfig); realizationRegistry = RealizationRegistry.getInstance(kylinConfig); badQueryHistoryManager = BadQueryHistoryManager.getInstance(kylinConfig); + streamingSourceConfigManager = StreamingSourceConfigManager.getInstance(kylinConfig); addRequired(ResourceStore.METASTORE_UUID_TAG); @@ -254,10 +260,10 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { private void executeExtraction(String dest) { logger.info("The resource paths going to be extracted:"); for (String s : requiredResources) { - logger.info(s + "(required)"); + logger.info("{} is required resources.", s); } for (String s : optionalResources) { - logger.info(s + "(optional)"); + logger.info("{} is optional resources.", s); } for (CubeInstance cube : cubesToTrimAndSave) { logger.info("Cube {} will be trimmed and extracted", cube); @@ -294,7 +300,7 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { private void engineOverwrite(File dest) throws IOException { if (engineType != null || storageType != null) { - for (File f : dest.listFiles()) { + for (File f : Objects.requireNonNull(dest.listFiles())) { if (f.isDirectory()) { engineOverwrite(f); } else { @@ -330,17 +336,6 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { return realizationRegistry.getRealization(realizationEntry.getType(), realizationEntry.getRealization()); } - private void addStreamingConfig(CubeInstance cube) { - streamingManager = StreamingManager.getInstance(kylinConfig); - for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) { - if (streamingConfig.getName() != null - && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) { - addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName())); - addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName())); - } - } - } - private void retrieveResourcePath(IRealization realization) throws IOException { if (realization == null) { return; @@ -354,6 +349,8 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { addTables(modelDesc); // add streaming stuff addStreamingConfig(cube); + // add streamingV2 + addStreamingV2Config(cube); // add cube addRequired(CubeDesc.concatResourcePath(cubeDesc.getName())); // add project @@ -376,14 +373,14 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { } } - private void addTables(DataModelDesc modelDesc) throws IOException { + private void addTables(DataModelDesc modelDesc) { if (modelDesc != null) { //fixme should get all tbls in prj not only in cubes when back up by prj. for (TableRef tableRef : modelDesc.getAllTables()) { addRequired(tableRef.getTableDesc().getResourcePath()); - addOptional(TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv()) // - .getTableExt(tableRef.getTableDesc()) // - .getResourcePath()); // + addOptional(TableMetadataManager.getInstance(KylinConfig.getInstanceFromEnv()) + .getTableExt(tableRef.getTableDesc()) + .getResourcePath()); } addRequired(DataModelDesc.concatResourcePath(modelDesc.getName())); } @@ -410,7 +407,6 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { } else { try { if (onlyJobOutput) { - ExecutablePO executablePO = executableDao.getJob(lastJobId); addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId); } else { ExecutablePO executablePO = executableDao.getJob(lastJobId); @@ -436,6 +432,33 @@ public class CubeMetaExtractor extends AbstractInfoExtractor { } } + private void addStreamingConfig(CubeInstance cube) { + streamingManager = StreamingManager.getInstance(kylinConfig); + for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) { + if (streamingConfig.getName() != null + && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) { + addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName())); + addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName())); + } + } + } + + private void addStreamingV2Config(CubeInstance cube) { + Collection<StreamingSourceConfig> streamingConfigs; + try { + streamingConfigs = streamingSourceConfigManager.listAllStreaming(); + } catch (IOException ioe) { + logger.error("", ioe); + return; + } + for (StreamingSourceConfig streamingConfig : streamingConfigs) { + if (streamingConfig.getName() != null + && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) { + addRequired(StreamingSourceConfig.concatResourcePath(streamingConfig.getName())); + } + } + } + private void addRequired(String record) { logger.info("adding required resource {}", record); requiredResources.add(record); diff --git a/tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java similarity index 80% rename from tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java index 8ffa473..e545ba6 100644 --- a/tool/src/main/java/org/apache/kylin/tool/HBaseUsageExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HBaseUsageExtractor.java @@ -16,13 +16,14 @@ * limitations under the License. */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; import java.net.URL; import java.nio.charset.Charset; import java.util.List; +import java.util.Locale; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; @@ -58,6 +59,10 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { @SuppressWarnings("static-access") private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false).withDescription("Specify realizations in which project to extract").create("project"); + public static final String HDFS_CHECK_COMMAND = "hadoop fs -ls -R %s/data/%s/%s*"; + private final String hbaseRootDir; + private final String cachedHMasterUrl; + private List<String> htables = Lists.newArrayList(); private Configuration conf; private CubeManager cubeManager; @@ -76,6 +81,8 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { options.addOptionGroup(realizationOrProject); conf = HBaseConfiguration.create(); + hbaseRootDir = conf.get("hbase.rootdir"); + cachedHMasterUrl = getHBaseMasterUrl(); } public static void main(String[] args) { @@ -83,18 +90,24 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { extractor.execute(args); } - private String getHBaseMasterUrl() throws IOException, KeeperException { + private String getHBaseMasterUrl() { String host = conf.get("hbase.master.info.bindAddress"); if (host.equals("0.0.0.0")) { - host = MasterAddressTracker.getMasterAddress(new ZooKeeperWatcher(conf, null, null)).getHostname(); + try { + host = MasterAddressTracker.getMasterAddress(new ZooKeeperWatcher(conf, null, null)).getHostname(); + } catch (IOException | KeeperException io) { + return null; + } } - String port = conf.get("hbase.master.info.port"); return "http://" + host + ":" + port + "/"; } @Override protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception { + if (cachedHMasterUrl == null) { + return; + } kylinConfig = KylinConfig.getInstanceFromEnv(); cubeManager = CubeManager.getInstance(kylinConfig); realizationRegistry = RealizationRegistry.getInstance(kylinConfig); @@ -124,14 +137,14 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { } } - extractCommonInfo(exportDir); + extractCommonInfo(exportDir, kylinConfig); extractHTables(exportDir); } private void extractHTables(File dest) throws IOException { logger.info("These htables are going to be extracted:"); for (String htable : htables) { - logger.info(htable + "(required)"); + logger.info("{} is required", htable); } File tableDir = new File(dest, "table"); @@ -139,7 +152,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { for (String htable : htables) { try { - URL srcUrl = new URL(getHBaseMasterUrl() + "table.jsp?name=" + htable); + URL srcUrl = new URL(cachedHMasterUrl + "table.jsp?name=" + htable); File destFile = new File(tableDir, htable + ".html"); FileUtils.copyURLToFile(srcUrl, destFile); } catch (Exception e) { @@ -148,14 +161,16 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { } } - private void extractCommonInfo(File dest) throws IOException { + private void extractCommonInfo(File dest, KylinConfig config) throws IOException { logger.info("The hbase master info/conf are going to be extracted..."); + String hbaseNamespace = config.getHBaseStorageNameSpace(); + String tableNamePrefix = config.getHBaseTableNamePrefix(); // hbase master page try { File masterDir = new File(dest, "master"); FileUtils.forceMkdir(masterDir); - URL srcMasterUrl = new URL(getHBaseMasterUrl() + "master-status"); + URL srcMasterUrl = new URL(cachedHMasterUrl + "master-status"); File masterDestFile = new File(masterDir, "master-status.html"); FileUtils.copyURLToFile(srcMasterUrl, masterDestFile); } catch (Exception e) { @@ -166,7 +181,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { try { File confDir = new File(dest, "conf"); FileUtils.forceMkdir(confDir); - URL srcConfUrl = new URL(getHBaseMasterUrl() + "conf"); + URL srcConfUrl = new URL(cachedHMasterUrl + "conf"); File destConfFile = new File(confDir, "hbase-conf.xml"); FileUtils.copyURLToFile(srcConfUrl, destConfFile); } catch (Exception e) { @@ -177,22 +192,33 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { try { File jmxDir = new File(dest, "jmx"); FileUtils.forceMkdir(jmxDir); - URL srcJmxUrl = new URL(getHBaseMasterUrl() + "jmx"); + URL srcJmxUrl = new URL(cachedHMasterUrl + "jmx"); File jmxDestFile = new File(jmxDir, "jmx.html"); FileUtils.copyURLToFile(srcJmxUrl, jmxDestFile); } catch (Exception e) { logger.warn("HBase JMX fetch failed: ", e); } + // dump page + try { + File dumpDir = new File(dest, "dump"); + FileUtils.forceMkdir(dumpDir); + URL srcDumpUrl = new URL(cachedHMasterUrl + "dump"); + File dumpDestFile = new File(dumpDir, "dump"); + FileUtils.copyURLToFile(srcDumpUrl, dumpDestFile); + } catch (Exception e) { + logger.warn("HBase Dump fetch failed: ", e); + } + // hbase hdfs status try { File hdfsDir = new File(dest, "hdfs"); FileUtils.forceMkdir(hdfsDir); CliCommandExecutor cliCommandExecutor = kylinConfig.getCliCommandExecutor(); - String output = cliCommandExecutor.execute("hadoop fs -ls -R " + conf.get("hbase.rootdir") + "/data/default/KYLIN_*").getSecond(); + String command = String.format(Locale.ROOT, HDFS_CHECK_COMMAND, hbaseRootDir, hbaseNamespace, tableNamePrefix); + logger.info("Execute command {}", command); + String output = cliCommandExecutor.execute(command).getSecond(); FileUtils.writeStringToFile(new File(hdfsDir, "hdfs-files.list"), output, Charset.defaultCharset()); - output = cliCommandExecutor.execute("hadoop fs -ls -R " + conf.get("hbase.rootdir") + "/data/default/kylin_*").getSecond(); - FileUtils.writeStringToFile(new File(hdfsDir, "hdfs-files.list"), output, Charset.defaultCharset(), true); } catch (Exception e) { logger.warn("HBase hdfs status fetch failed: ", e); } @@ -212,7 +238,7 @@ public class HBaseUsageExtractor extends AbstractInfoExtractor { addHTable(segment.getStorageLocationIdentifier()); } } else { - logger.warn("Unknown realization type: " + realization.getType()); + logger.warn("Unknown realization type: {}", realization.getType()); } } diff --git a/tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java similarity index 82% rename from tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java index d8c8e2e..532478b 100644 --- a/tool/src/main/java/org/apache/kylin/tool/common/HadoopConfExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HadoopConfExtractor.java @@ -17,7 +17,7 @@ * */ -package org.apache.kylin.tool.common; +package org.apache.kylin.tool.extractor; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -35,20 +35,22 @@ import com.google.common.base.Preconditions; public class HadoopConfExtractor { private static final Logger logger = LoggerFactory.getLogger(HadoopConfExtractor.class); + public static final String MR_JOB_HISTORY_URL_CONF_KEY = "mapreduce.jobhistory.webapp.address"; public static String extractYarnMasterUrl(Configuration conf) { KylinConfig config = KylinConfig.getInstanceFromEnv(); final String yarnStatusCheckUrl = config.getYarnStatusCheckUrl(); - Pattern pattern = Pattern.compile("(http(s)?://)([^:]*):([^/])*.*"); + Pattern pattern = Pattern.compile("(http[s]*://)([^:]*):([^/])*.*"); if (yarnStatusCheckUrl != null) { + logger.info("Choose user-defined configuration for RM url {}. ", yarnStatusCheckUrl); Matcher m = pattern.matcher(yarnStatusCheckUrl); if (m.matches()) { return m.group(1) + m.group(2) + ":" + m.group(3); } + } else { + logger.info("kylin.engine.mr.yarn-check-status-url" + " is not set, read from hadoop configuration"); } - logger.info("kylin.engine.mr.yarn-check-status-url" + " is not set, read from hadoop configuration"); - String webappConfKey, defaultAddr; if (YarnConfiguration.useHttps(conf)) { webappConfKey = YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS; @@ -76,15 +78,15 @@ public class HadoopConfExtractor { } Matcher m = pattern.matcher(rmWebHost); Preconditions.checkArgument(m.matches(), "Yarn master URL not found."); - logger.info("yarn master url: " + rmWebHost); + logger.info("yarn master url: {}", rmWebHost); return rmWebHost; } public static String extractJobHistoryUrl(String yarnWebapp, Configuration conf) { - Pattern pattern = Pattern.compile("(http(s)?://)([^:]*):([^/])*.*"); + Pattern pattern = Pattern.compile("(http[s]*://)([^:]*):([^/])*.*"); Matcher m = pattern.matcher(yarnWebapp); Preconditions.checkArgument(m.matches(), "Yarn master URL" + yarnWebapp + " not right."); - return m.group(1) - + HAUtil.getConfValueForRMInstance("mapreduce.jobhistory.webapp.address", m.group(2) + ":19888", conf); + String defaultHistoryUrl = m.group(2) + ":19888"; + return m.group(1) + HAUtil.getConfValueForRMInstance(MR_JOB_HISTORY_URL_CONF_KEY, defaultHistoryUrl, conf); } } diff --git a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java b/tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java similarity index 98% rename from tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java index e5b300a..b56f17c 100644 --- a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/HiveConfigCLI.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java new file mode 100644 index 0000000..7bf5a34 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/JStackExtractor.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.kylin.tool.extractor; + +import org.apache.commons.io.FileUtils; +import org.apache.kylin.common.util.CliCommandExecutor; +import org.apache.kylin.common.util.OptionsHelper; +import org.apache.kylin.common.util.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.Locale; + +/** + * https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jstack.html + */ +public class JStackExtractor extends AbstractInfoExtractor { + private static final Logger logger = LoggerFactory.getLogger(JStackExtractor.class); + + public JStackExtractor() { + super(); + packageType = "jstack"; + } + + protected void executeExtract(OptionsHelper optionsHelper, File exportDir) { + try { + File logDir = new File(exportDir, "logs"); + File jstackDumpFile = new File(logDir, String.format(Locale.ROOT, "jstack.log.%s", System.currentTimeMillis())); + dumpKylinJStack(jstackDumpFile); + } catch (IOException e) { + logger.error("IO Error on dump jstack", e); + } + } + + private static void dumpKylinJStack(File outputFile) throws IOException { + String jstackDumpCmd = String.format(Locale.ROOT, "jstack -l %s", getKylinPid()); + Pair<Integer, String> result = new CliCommandExecutor().execute(jstackDumpCmd, null); + FileUtils.writeStringToFile(outputFile, result.getSecond()); + } +} diff --git a/tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java similarity index 98% rename from tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java index bd03aaa..7ebc3e0 100644 --- a/tool/src/main/java/org/apache/kylin/tool/JobInstanceExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/JobInstanceExtractor.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.util.List; @@ -84,7 +84,7 @@ public class JobInstanceExtractor extends AbstractInfoExtractor { String cube = optionsHelper.hasOption(OPTION_CUBE) ? optionsHelper.getOptionValue(OPTION_CUBE) : null; String project = optionsHelper.hasOption(OPTION_PROJECT) ? optionsHelper.getOptionValue(OPTION_PROJECT) : null; int period = optionsHelper.hasOption(OPTION_PERIOD) ? Integer.parseInt(optionsHelper.getOptionValue(OPTION_PERIOD)) : DEFAULT_PERIOD; - + // maybe use start time and end time to instead of period is better long endTime = System.currentTimeMillis(); long startTime = endTime - period * 24 * 3600 * 1000; // time in Millis List<JobInstance> jobInstances = listJobInstances(project, cube, startTime, endTime); diff --git a/tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java similarity index 91% rename from tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java index a84345b..4ec10e6 100644 --- a/tool/src/main/java/org/apache/kylin/tool/KylinLogExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/KylinLogExtractor.java @@ -17,7 +17,7 @@ * */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.util.ArrayList; @@ -32,7 +32,6 @@ import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.metadata.model.DataModelManager; import org.apache.kylin.metadata.project.ProjectManager; -import org.apache.kylin.tool.util.ToolUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,11 +82,8 @@ public class KylinLogExtractor extends AbstractInfoExtractor { logger.info("Start to extract kylin logs in {} days", logPeriod); List<File> logDirs = Lists.newArrayList(); - logDirs.add(new File(KylinConfig.getKylinHome(), "logs")); - String kylinVersion = ToolUtil.decideKylinMajorVersionFromCommitFile(); - if (kylinVersion != null && kylinVersion.equals("1.3")) { - logDirs.add(new File(KylinConfig.getKylinHome(), "tomcat/logs")); - } + logDirs.add(new File(getKylinHome(), "logs")); + logDirs.add(new File(getKylinHome(), "tomcat/logs")); final ArrayList<File> requiredLogFiles = Lists.newArrayList(); final long logThresholdTime = System.currentTimeMillis() - logPeriod * 24 * 3600 * 1000; diff --git a/tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java similarity index 97% rename from tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java rename to tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java index 31c0a5c..a4b516d 100644 --- a/tool/src/main/java/org/apache/kylin/tool/MrJobInfoExtractor.java +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/MrJobInfoExtractor.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.kylin.tool; +package org.apache.kylin.tool.extractor; import java.io.File; import java.io.IOException; @@ -35,7 +35,6 @@ import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; import org.apache.kylin.common.util.HadoopUtil; import org.apache.kylin.common.util.OptionsHelper; -import org.apache.kylin.tool.common.HadoopConfExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +44,9 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +/** + * http://hadoop.apache.org/docs/r2.7.3/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html + */ public class MrJobInfoExtractor extends AbstractInfoExtractor { private static final Logger logger = LoggerFactory.getLogger(MrJobInfoExtractor.class); @@ -83,9 +85,9 @@ public class MrJobInfoExtractor extends AbstractInfoExtractor { private String getHttpResponse(String url) { DefaultHttpClient client = new DefaultHttpClient(); String msg = null; - int retry_times = 0; - while (msg == null && retry_times < HTTP_RETRY) { - retry_times++; + int retryTimes = 0; + while (msg == null && retryTimes < HTTP_RETRY) { + retryTimes++; HttpGet request = new HttpGet(url); try { @@ -93,7 +95,7 @@ public class MrJobInfoExtractor extends AbstractInfoExtractor { HttpResponse response = client.execute(request); msg = EntityUtils.toString(response.getEntity()); } catch (Exception e) { - logger.warn("Failed to fetch http response. Retry={}", retry_times, e); + logger.warn("Failed to fetch http response. Retry={}", retryTimes, e); } finally { request.releaseConnection(); } diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java new file mode 100644 index 0000000..6b45548 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/SparkEnvInfoExtractor.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.kylin.tool.extractor; + +import com.google.common.base.Preconditions; +import org.apache.commons.io.FileUtils; +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.OptionsHelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + +public class SparkEnvInfoExtractor extends AbstractInfoExtractor { + private static final Logger logger = LoggerFactory.getLogger(SparkEnvInfoExtractor.class); + + public SparkEnvInfoExtractor(){ + super(); + packageType = "spark"; + } + + private File getSparkConfDir() { + String sparkHome = KylinConfig.getSparkHome(); + File sparkHomeDir = new File(sparkHome); + Preconditions.checkArgument(sparkHomeDir.exists(), "Your SPARK_HOME does not exist."); + return new File(sparkHomeDir, "conf"); + } + + private File getHadoopConfDir() { + String hadoopConf = System.getenv("HADOOP_CONF_DIR"); + // maybe add a user defined env setting in kylin.properties + Preconditions.checkNotNull(hadoopConf, "Cannot find HADOOP_CONF_DIR in the environment."); + File hadoopConfDir = new File(hadoopConf); + Preconditions.checkArgument(hadoopConfDir.exists(), "Your HADOOP_CONF_DIR does not exist: " + hadoopConf); + return hadoopConfDir; + } + + private void extractConfDir(File from, File to) { + File[] confFiles = from.listFiles(); + if (confFiles != null) { + for (File confFile : confFiles) { + if (!confFile.getName().endsWith(".template")) { + addFile(confFile, to); + } + } + } + } + + @Override + protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception { + StringBuilder envStrBuilder = new StringBuilder(); + + // extract spark configurations + try { + File sparkHome = getSparkConfDir(); + extractConfDir(sparkHome, new File(exportDir, "spark-conf")); + envStrBuilder.append("SPARK_HOME=").append(sparkHome.getAbsolutePath()).append("\n"); + } catch (Exception e) { + logger.error("Failed to extract spark conf: error={}", e.getMessage()); + } + + // extract hadoop configurations for spark + try { + File hadoopConf = getHadoopConfDir(); + extractConfDir(hadoopConf, new File(exportDir, "hadoop-conf")); + envStrBuilder.append("HADOOP_CONF_DIR=").append(hadoopConf.getAbsolutePath()).append("\n"); + } catch (Exception e) { + logger.error("Failed to extract hadoop conf: error={}", e.getMessage()); + } + + // extract spark env variables + FileUtils.write(new File(exportDir, "env"), envStrBuilder.toString()); + } +} diff --git a/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java b/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java new file mode 100644 index 0000000..658872d --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/extractor/YarnLogExtractor.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.kylin.tool.extractor; + + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.io.FileUtils; +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.job.constant.ExecutableConstants; +import org.apache.kylin.job.dao.ExecutableDao; +import org.apache.kylin.job.dao.ExecutablePO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * http://hadoop.apache.org/docs/r2.7.3/hadoop-yarn/hadoop-yarn-site/YarnCommands.html#logs + */ +public class YarnLogExtractor { + private static final Logger logger = LoggerFactory.getLogger(YarnLogExtractor.class); + List<String> yarnLogsResources = Lists.newArrayList(); + private KylinConfig kylinConfig; + private ExecutableDao executableDao; + + public void extractYarnLogAndMRJob(String jobId, File yarnLogDir) throws Exception { + logger.info("Collecting Yarn logs and MR counters for the Job {}", jobId); + kylinConfig = KylinConfig.getInstanceFromEnv(); + executableDao = ExecutableDao.getInstance(kylinConfig); + ExecutablePO executablePO = null; + executablePO = executableDao.getJob(jobId); + + if (executablePO == null) { + logger.error("Can not find executablePO."); + return; + } + + for (ExecutablePO task : executablePO.getTasks()) { + yarnLogsResources.add(task.getUuid()); + } + + for (String stepId : yarnLogsResources) { + logger.info("Checking step {}", stepId); + extractYarnLog(stepId, new File(yarnLogDir, stepId)); + extractMRJob(stepId, new File(yarnLogDir, stepId)); + } + } + + + protected void extractMRJob(String taskId, File destDir) { + try { + final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo(); + String jobId = null; + if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) { + jobId = jobInfo.get(ExecutableConstants.MR_JOB_ID); + } else if (taskId.endsWith("00")) { + logger.info("Create Intermediate Flat Hive Table's taskId: " + taskId); + final String jobContent = executableDao.getJobOutput(taskId).getContent(); + if (jobContent != null) { + String applicationId = extractApplicationId(jobContent); + if (applicationId != null) { + jobId = applicationId.replace("application", "job"); + logger.info("jobId is: " + jobId); + } + } + } + + if (jobId != null) { + FileUtils.forceMkdir(destDir); + String[] mrJobArgs = {"-mrJobId", jobId, "-destDir", destDir.getAbsolutePath(), "-compress", "false", + "-submodule", "true"}; + new MrJobInfoExtractor().execute(mrJobArgs); + } + + } catch (Exception e) { + logger.error("Failed to extract MRJob .", e); + } + + } + + protected void extractYarnLog(String taskId, File destDir) { + try { + final Map<String, String> jobInfo = executableDao.getJobOutput(taskId).getInfo(); + FileUtils.forceMkdir(destDir); + String appId = null; + if (jobInfo.containsKey(ExecutableConstants.MR_JOB_ID)) { + appId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application"); + } else if (jobInfo.containsKey(ExecutableConstants.SPARK_JOB_ID)) { + appId = jobInfo.get(ExecutableConstants.SPARK_JOB_ID); + } + + if (appId != null) { + String applicationId = jobInfo.get(ExecutableConstants.MR_JOB_ID).replace("job", "application"); + extractYarnLogByApplicationId(applicationId, destDir); + } else if (taskId.endsWith("00")) { + extractFlatStepInfo(taskId, destDir); + } + } catch (Exception e) { + logger.error("Failed to extract yarn log.", e); + } + + } + + private void extractFlatStepInfo(String taskId, File destDir) { + try { + logger.info("Create Intermediate Flat Hive Table's taskId: " + taskId); + final String jobContent = executableDao.getJobOutput(taskId).getContent(); + if (jobContent != null) { + String applicationId = extractApplicationId(jobContent); + + logger.info("applicationId is: " + applicationId); + if (applicationId != null && applicationId.startsWith("application")) { + logger.info("Create Intermediate Flat Hive Table's applicationId: " + applicationId); + extractYarnLogByApplicationId(applicationId, destDir); + } + } + } catch (Exception e) { + logger.error("Failed to extract FlatStepInfo.", e); + } + } + + private String extractApplicationId(String jobContent) { + Matcher matcher = Pattern.compile("application_[0-9]+[_][0-9]+").matcher(jobContent); + + if (matcher.find()) { + return matcher.group(0); + } + return null; + } + + private void extractYarnLogByApplicationId(String applicationId, File destDir) throws Exception { + if (shouldDoLogCollection(applicationId, kylinConfig)) { + File destFile = new File(destDir, applicationId + ".log"); + String yarnCmd = "yarn logs -applicationId " + applicationId + " > " + destFile.getAbsolutePath(); + logger.info(yarnCmd); + try { + kylinConfig.getCliCommandExecutor().execute(yarnCmd); + } catch (Exception ex) { + logger.warn("Failed to get yarn logs. ", ex); + } + } else { + logger.info("Skip this application {}.", applicationId); + } + } + + /** + * The log of application which is finished & failed should be collected + */ + public static boolean shouldDoLogCollection(String applicationId, KylinConfig kylinConfig) throws IOException { + final String yarnCmd = "yarn application -status " + applicationId; + final String cmdOutput = kylinConfig.getCliCommandExecutor().execute(yarnCmd).getSecond(); + final Map<String, String> params = Maps.newHashMap(); + final String[] cmdOutputLines = cmdOutput.split("\n"); + for (String cmdOutputLine : cmdOutputLines) { + String[] pair = cmdOutputLine.split(":"); + if (pair.length >= 2) { + params.put(pair[0].trim(), pair[1].trim()); + } + } + for (Map.Entry<String, String> e : params.entrySet()) { + logger.info("Status of {} {} : {}", applicationId, e.getKey(), e.getValue()); + } + + // Skip running application because log agg is not completed + if (params.containsKey("State") && params.get("State").equals("RUNNING")) { + return false; + } + // Skip succeed application + return params.containsKey("Final-State") && !params.get("Final-State").equals("SUCCEEDED"); + } +} diff --git a/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java b/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java index 842beb2..388e569 100644 --- a/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java +++ b/tool/src/main/java/org/apache/kylin/tool/util/ToolUtil.java @@ -19,7 +19,6 @@ package org.apache.kylin.tool.util; -import com.google.common.collect.Maps; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; @@ -28,7 +27,6 @@ import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; -import java.util.Map; public class ToolUtil { @@ -55,17 +53,6 @@ public class ToolUtil { return store.getMetaStoreUUID(); } - public static String decideKylinMajorVersionFromCommitFile() { - Map<String, String> majorVersionCommitMap = Maps.newHashMap(); - majorVersionCommitMap.put("1.3", "commit.sha1"); - majorVersionCommitMap.put("1.5", "commit_SHA1"); - for (Map.Entry<String, String> majorVersionEntry : majorVersionCommitMap.entrySet()) { - if (new File(KylinConfig.getKylinHome(), majorVersionEntry.getValue()).exists()) { - return majorVersionEntry.getKey(); - } - } - return null; - } public static String getHostName() { String hostname = System.getenv("COMPUTERNAME"); diff --git a/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java b/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java index d2b31b0..37e7c17 100644 --- a/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java +++ b/tool/src/test/java/org/apache/kylin/tool/ClientEnvExtractorTest.java @@ -19,6 +19,7 @@ package org.apache.kylin.tool; import org.apache.kylin.common.util.HBaseMetadataTestCase; +import org.apache.kylin.tool.extractor.ClientEnvExtractor; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -56,7 +57,6 @@ public class ClientEnvExtractorTest extends HBaseMetadataTestCase { ClientEnvExtractor executor = new ClientEnvExtractor(); executor.maxWaitSeconds = 2; executor.addShellOutput("sleep 1000", f.getAbsolutePath(), "testTimeout"); - executor.addShellOutput("pwd", f.getAbsolutePath(), "pwd"); } @Test diff --git a/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java b/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java index d87bfd8..2343301 100644 --- a/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java +++ b/tool/src/test/java/org/apache/kylin/tool/CubeMetaExtractorTest.java @@ -28,6 +28,7 @@ import java.util.Set; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceTool; import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.apache.kylin.tool.extractor.CubeMetaExtractor; import org.junit.After; import org.junit.Assert; import org.junit.Before;