This is an automated email from the ASF dual-hosted git repository.
abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4d964df5918 HIVE-28569: ReExecuteLostAMQueryPlugin doesn't kick in if
the message contains line break and diagnostics (#5499) (Laszlo Bodor reviewed
by Denys Kuzmenko)
4d964df5918 is described below
commit 4d964df5918e36c7cf51d923db895fcce0258ca8
Author: Bodor Laszlo <[email protected]>
AuthorDate: Thu Oct 10 13:19:17 2024 +0200
HIVE-28569: ReExecuteLostAMQueryPlugin doesn't kick in if the message
contains line break and diagnostics (#5499) (Laszlo Bodor reviewed by Denys
Kuzmenko)
---
.../hadoop/hive/ql/reexec/ReExecuteLostAMQueryPlugin.java | 2 +-
.../hadoop/hive/ql/reexec/TestReExecuteLostAMQueryPlugin.java | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecuteLostAMQueryPlugin.java
b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecuteLostAMQueryPlugin.java
index c3a6c8a363b..8e2b6adbbc9 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecuteLostAMQueryPlugin.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecuteLostAMQueryPlugin.java
@@ -39,7 +39,7 @@ public class ReExecuteLostAMQueryPlugin implements
IReExecutionPlugin {
// Lost am container have exit code -100, due to node failures. This pattern
of exception is thrown when
// AM is managed by HS2.
private static final Pattern LOST_AM_CONTAINER_ERROR_PATTERN =
- Pattern.compile(".*AM Container for .* exited .* exitCode: -100.*");
+ Pattern.compile(".*AM Container for .* exited .* exitCode: -100.*",
Pattern.DOTALL);
// When HS2 does not manage the AMs, tez AMs are registered with zookeeper
and HS2 discovers it,
// failure of unmanaged AMs will throw AM record not being found in
zookeeper.
private static final String UNMANAGED_AM_FAILURE = "AM record not found
(likely died)";
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/reexec/TestReExecuteLostAMQueryPlugin.java
b/ql/src/test/org/apache/hadoop/hive/ql/reexec/TestReExecuteLostAMQueryPlugin.java
index 1d29f324945..da926fe4484 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/reexec/TestReExecuteLostAMQueryPlugin.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/reexec/TestReExecuteLostAMQueryPlugin.java
@@ -31,6 +31,16 @@ public class TestReExecuteLostAMQueryPlugin {
testReExecuteWithExceptionMessage("AM record not found (likely died)");
}
+ @Test
+ public void testRetryOnLostAmContainerMessageWithLineBreak() throws
Exception {
+ testReExecuteWithExceptionMessage("Application
application_1728328561547_0042 failed 1 times (global limit =5; " +
+ "local limit is =1) due to AM Container for
appattempt_1728328561547_0042_000001 exited with exitCode: " +
+ "-100\nFailing this attempt.Diagnostics: Container released on a
*lost* nodeFor more detailed output, check " +
+ "the application tracking page:
https://v2h0231.sjc.cloudera.com:8090/cluster/app/application_1728328561547_0042"
+
+ " Then click on links to logs of each attempt.\n" +
+ ". Failing the application.");
+ }
+
@Test
public void testRetryOnNoCurrentDAGException() throws Exception {
testReExecuteWithExceptionMessage("No running DAG at present");