[ https://issues.apache.org/jira/browse/HIVE-27295?focusedWorklogId=859112&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-859112 ]
ASF GitHub Bot logged work on HIVE-27295: ----------------------------------------- Author: ASF GitHub Bot Created on: 26/Apr/23 09:11 Start Date: 26/Apr/23 09:11 Worklog Time Spent: 10m Work Description: zabetak commented on code in PR #4268: URL: https://github.com/apache/hive/pull/4268#discussion_r1177577590 ########## itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java: ########## @@ -99,41 +97,54 @@ private ProcessResults runCmd(String[] cmd, long secondsToWait) reader = new BufferedReader(new InputStreamReader(proc.getErrorStream())); final StringBuilder errLines = new StringBuilder(); reader.lines().forEach(s -> errLines.append(s).append('\n')); - LOG.info("Result size: " + lines.length() + ";" + errLines.length()); + LOG.info("Result lines#: {}(stdout);{}(stderr)",lines.length(), errLines.length()); return new ProcessResults(lines.toString(), errLines.toString(), proc.exitValue()); } - private int runCmdAndPrintStreams(String[] cmd, long secondsToWait) + private ProcessResults runCmdAndPrintStreams(String[] cmd, long secondsToWait) throws InterruptedException, IOException { ProcessResults results = runCmd(cmd, secondsToWait); LOG.info("Stdout from proc: " + results.stdout); LOG.info("Stderr from proc: " + results.stderr); - return results.rc; + return results; } public void launchDockerContainer() throws Exception { runCmdAndPrintStreams(buildRmCmd(), 600); - if (runCmdAndPrintStreams(buildRunCmd(), 600) != 0) { - throw new RuntimeException("Unable to start docker container"); + if (runCmdAndPrintStreams(buildRunCmd(), 600).rc != 0) { + printDockerEvents(); + throw new RuntimeException("Unable to start docker container"); } long startTime = System.currentTimeMillis(); ProcessResults pr; do { Thread.sleep(1000); - pr = runCmd(buildLogCmd(), 30); + pr = runCmdAndPrintStreams(buildLogCmd(), 30); if (pr.rc != 0) { - throw new RuntimeException("Failed to get docker logs"); + printDockerEvents(); + throw new RuntimeException("Failed to get docker logs"); } } while (startTime + MAX_STARTUP_WAIT >= System.currentTimeMillis() && !isContainerReady(pr)); if (startTime + MAX_STARTUP_WAIT < System.currentTimeMillis()) { - throw new RuntimeException("Container failed to be ready in " + MAX_STARTUP_WAIT/1000 + - " seconds"); + printDockerEvents(); + throw new RuntimeException( + String.format("Container initialization failed within %d seconds. Please check the hive logs.", + MAX_STARTUP_WAIT / 1000)); } - } + } + + public void printDockerEvents() { Review Comment: Do we need this `public`? Issue Time Tracking ------------------- Worklog Id: (was: 859112) Time Spent: 1.5h (was: 1h 20m) > Improve docker logging in AbstractExternalDB > -------------------------------------------- > > Key: HIVE-27295 > URL: https://issues.apache.org/jira/browse/HIVE-27295 > Project: Hive > Issue Type: Improvement > Reporter: László Bodor > Assignee: László Bodor > Priority: Major > Labels: pull-request-available > Time Spent: 1.5h > Remaining Estimate: 0h > > 1. While waiting for docker container to start properly, we should print the > output of docker logs command in every loop, otherwise we can miss important > information about the actual startup process if the docker container was oom > killed in the meantime. Not to mention the fact that we're currently not > logging the output at all in case of an error: > https://github.com/apache/hive/blob/59058c65457fb7ab9d8575a555034e6633962661/itests/util/src/main/java/org/apache/hadoop/hive/ql/externalDB/AbstractExternalDB.java#L125-L127 > 2. We can include the output for docker events in the logs in case of an > error (like: oom killed container), which might contain useful information. > We can have info like this: > {code} > 2023-04-25T08:47:08.852515314-07:00 container oom > 2ba12cd9cd844bb30b3158564bd68cd97f25e7a05172d111713ac9f7c1c0b1d4 > (image=harbor.rke-us-west-04.kc.cloudera.com/docker_private_cache/cloudera_thirdparty/postgres:9.3, > name=qtestExternalDB-PostgresExternalDB) > 2023-04-25T08:47:08.893742200-07:00 container die > 2ba12cd9cd844bb30b3158564bd68cd97f25e7a05172d111713ac9f7c1c0b1d4 (exitCode=1, > image=harbor.rke-us-west-04.kc.cloudera.com/docker_private_cache/cloudera_thirdparty/postgres:9.3, > name=qtestExternalDB-PostgresExternalDB) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)