This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new dc28391d455 HIVE-28661: OTEL: Latency in retrieving query end time
leads to thread interruption (#5576). (Tanishq Chugh, reviewed by Ayush Saxena)
dc28391d455 is described below
commit dc28391d455425b5adf47f47f5f4f9873dc1973d
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu Dec 12 22:09:23 2024 +0530
HIVE-28661: OTEL: Latency in retrieving query end time leads to thread
interruption (#5576). (Tanishq Chugh, reviewed by Ayush Saxena)
---
.../apache/hive/service/servlet/OTELExporter.java | 93 ++++++++++++----------
1 file changed, 50 insertions(+), 43 deletions(-)
diff --git a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
index 56aac3fc34b..bd70b179e25 100644
--- a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
+++ b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
@@ -65,8 +65,13 @@ public class OTELExporter extends Thread {
@Override
public void run() {
while (true) {
- jvmMetrics.setJvmMetrics();
- exposeMetricsToOTEL();
+ try {
+ jvmMetrics.setJvmMetrics();
+ exposeMetricsToOTEL();
+ } catch (Throwable e) {
+ LOG.error("Exception occurred in OTELExporter thread ", e);
+ }
+
try {
Thread.sleep(frequency);
} catch (InterruptedException e) {
@@ -136,55 +141,57 @@ public class OTELExporter extends Thread {
Set<String> historicalQueryIDs = new HashSet<>();
for (QueryInfo hQuery : historicalQueries) {
- String hQueryId = hQuery.getQueryDisplay().getQueryId();
- historicalQueryIDs.add(hQueryId);
- Span rootspan = queryIdToSpanMap.remove(hQueryId);
- Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
+ if (hQuery.getEndTime() != null) {
+ String hQueryId = hQuery.getQueryDisplay().getQueryId();
+ historicalQueryIDs.add(hQueryId);
+ Span rootspan = queryIdToSpanMap.remove(hQueryId);
+ Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
+
+ //For queries that were live till last loop but have ended before
start of this loop
+ if (rootspan != null) {
+ for (QueryDisplay.TaskDisplay task :
hQuery.getQueryDisplay().getTaskDisplays()) {
+ if (!completedTasks.contains(task.getTaskId())) {
+ Context parentContext = Context.current().with(rootspan);
+ tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
+
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
+ .setStartTimestamp(task.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan()
+ .end(task.getEndTime(), TimeUnit.MILLISECONDS);
+ }
+ }
+
+ //Update the rootSpan name & attributes before ending it
+ rootspan.updateName(hQueryId + " -
completed").setAllAttributes(addQueryAttributes(hQuery))
+ .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
+ historicalQueryId.add(hQueryId);
+ }
- //For queries that were live till last loop but have ended before start
of this loop
- if (rootspan != null) {
- for (QueryDisplay.TaskDisplay task :
hQuery.getQueryDisplay().getTaskDisplays()) {
- if (!completedTasks.contains(task.getTaskId())) {
- Context parentContext = Context.current().with(rootspan);
+ //For queries that already ended either before OTEL service started or
in between OTEL loops
+ if (historicalQueryId.add(hQueryId)) {
+ rootspan = tracer.spanBuilder(hQueryId + " - completed")
+ .setStartTimestamp(hQuery.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan();
+ Context parentContext = Context.current().with(rootspan);
+
+ Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
+ .setStartTimestamp(hQuery.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan()
+ .setAttribute("QueryId", hQueryId)
+ .setAttribute("QueryString",
hQuery.getQueryDisplay().getQueryString())
+ .setAttribute("UserName", hQuery.getUserName())
+ .setAttribute("ExecutionEngine",
hQuery.getExecutionEngine());
+ if (hQuery.getQueryDisplay().getErrorMessage() != null) {
+ initSpan.setAttribute("ErrorMessage",
hQuery.getQueryDisplay().getErrorMessage());
+ }
+ initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);
+
+ for (QueryDisplay.TaskDisplay task :
hQuery.getQueryDisplay().getTaskDisplays()) {
+ parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}
- }
-
- //Update the rootSpan name & attributes before ending it
- rootspan.updateName(hQueryId + " -
completed").setAllAttributes(addQueryAttributes(hQuery))
- .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
- historicalQueryId.add(hQueryId);
- }
- //For queries that already ended either before OTEL service started or
in between OTEL loops
- if (historicalQueryId.add(hQueryId)) {
- rootspan = tracer.spanBuilder(hQueryId + " - completed")
- .setStartTimestamp(hQuery.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan();
- Context parentContext = Context.current().with(rootspan);
-
- Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
- .setStartTimestamp(hQuery.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan()
- .setAttribute("QueryId", hQueryId)
- .setAttribute("QueryString",
hQuery.getQueryDisplay().getQueryString())
- .setAttribute("UserName", hQuery.getUserName())
- .setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
- if (hQuery.getQueryDisplay().getErrorMessage() != null) {
- initSpan.setAttribute("ErrorMessage",
hQuery.getQueryDisplay().getErrorMessage());
+
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(),
TimeUnit.MILLISECONDS);
}
- initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);
-
- for (QueryDisplay.TaskDisplay task :
hQuery.getQueryDisplay().getTaskDisplays()) {
- parentContext = Context.current().with(rootspan);
- tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
-
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
- .setStartTimestamp(task.getBeginTime(),
TimeUnit.MILLISECONDS).startSpan()
- .end(task.getEndTime(), TimeUnit.MILLISECONDS);
- }
-
-
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(),
TimeUnit.MILLISECONDS);
}
}