This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dc28391d455 HIVE-28661: OTEL: Latency in retrieving query end time 
leads to thread interruption (#5576). (Tanishq Chugh, reviewed by Ayush Saxena)
dc28391d455 is described below

commit dc28391d455425b5adf47f47f5f4f9873dc1973d
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu Dec 12 22:09:23 2024 +0530

    HIVE-28661: OTEL: Latency in retrieving query end time leads to thread 
interruption (#5576). (Tanishq Chugh, reviewed by Ayush Saxena)
---
 .../apache/hive/service/servlet/OTELExporter.java  | 93 ++++++++++++----------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java 
b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
index 56aac3fc34b..bd70b179e25 100644
--- a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
+++ b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java
@@ -65,8 +65,13 @@ public class OTELExporter extends Thread {
   @Override
   public void run() {
     while (true) {
-      jvmMetrics.setJvmMetrics();
-      exposeMetricsToOTEL();
+      try {
+        jvmMetrics.setJvmMetrics();
+        exposeMetricsToOTEL();
+      } catch (Throwable e) {
+        LOG.error("Exception occurred in OTELExporter thread ", e);
+      }
+      
       try {
         Thread.sleep(frequency);
       } catch (InterruptedException e) {
@@ -136,55 +141,57 @@ public class OTELExporter extends Thread {
 
     Set<String> historicalQueryIDs = new HashSet<>();
     for (QueryInfo hQuery : historicalQueries) {
-      String hQueryId = hQuery.getQueryDisplay().getQueryId();
-      historicalQueryIDs.add(hQueryId);
-      Span rootspan = queryIdToSpanMap.remove(hQueryId);
-      Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
+      if (hQuery.getEndTime() != null) {
+        String hQueryId = hQuery.getQueryDisplay().getQueryId();
+        historicalQueryIDs.add(hQueryId);
+        Span rootspan = queryIdToSpanMap.remove(hQueryId);
+        Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
+
+        //For queries that were live till last loop but have ended before 
start of this loop
+        if (rootspan != null) {
+          for (QueryDisplay.TaskDisplay task : 
hQuery.getQueryDisplay().getTaskDisplays()) {
+            if (!completedTasks.contains(task.getTaskId())) {
+              Context parentContext = Context.current().with(rootspan);
+              tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
+                      
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
+                      .setStartTimestamp(task.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan()
+                      .end(task.getEndTime(), TimeUnit.MILLISECONDS);
+            }
+          }
+  
+          //Update the rootSpan name & attributes before ending it
+          rootspan.updateName(hQueryId + " - 
completed").setAllAttributes(addQueryAttributes(hQuery))
+                  .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
+          historicalQueryId.add(hQueryId);
+        }
 
-      //For queries that were live till last loop but have ended before start 
of this loop
-      if (rootspan != null) {
-        for (QueryDisplay.TaskDisplay task : 
hQuery.getQueryDisplay().getTaskDisplays()) {
-          if (!completedTasks.contains(task.getTaskId())) {
-            Context parentContext = Context.current().with(rootspan);
+        //For queries that already ended either before OTEL service started or 
in between OTEL loops
+        if (historicalQueryId.add(hQueryId)) {
+          rootspan = tracer.spanBuilder(hQueryId + " - completed")
+                  .setStartTimestamp(hQuery.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan();
+          Context parentContext = Context.current().with(rootspan);
+          
+          Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
+                  .setStartTimestamp(hQuery.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan()
+                  .setAttribute("QueryId", hQueryId)
+                  .setAttribute("QueryString", 
hQuery.getQueryDisplay().getQueryString())
+                  .setAttribute("UserName", hQuery.getUserName())
+                  .setAttribute("ExecutionEngine", 
hQuery.getExecutionEngine());
+          if (hQuery.getQueryDisplay().getErrorMessage() != null) {
+            initSpan.setAttribute("ErrorMessage", 
hQuery.getQueryDisplay().getErrorMessage());
+          }
+          initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);
+
+          for (QueryDisplay.TaskDisplay task : 
hQuery.getQueryDisplay().getTaskDisplays()) {
+            parentContext = Context.current().with(rootspan);
             tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
                     
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
                     .setStartTimestamp(task.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan()
                     .end(task.getEndTime(), TimeUnit.MILLISECONDS);
           }
-        }
-
-        //Update the rootSpan name & attributes before ending it
-        rootspan.updateName(hQueryId + " - 
completed").setAllAttributes(addQueryAttributes(hQuery))
-                .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
-        historicalQueryId.add(hQueryId);
-      }
 
-      //For queries that already ended either before OTEL service started or 
in between OTEL loops
-      if (historicalQueryId.add(hQueryId)) {
-        rootspan = tracer.spanBuilder(hQueryId + " - completed")
-                .setStartTimestamp(hQuery.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan();
-        Context parentContext = Context.current().with(rootspan);
-
-        Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
-                .setStartTimestamp(hQuery.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan()
-                .setAttribute("QueryId", hQueryId)
-                .setAttribute("QueryString", 
hQuery.getQueryDisplay().getQueryString())
-                .setAttribute("UserName", hQuery.getUserName())
-                .setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
-        if (hQuery.getQueryDisplay().getErrorMessage() != null) {
-          initSpan.setAttribute("ErrorMessage", 
hQuery.getQueryDisplay().getErrorMessage());
+          
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), 
TimeUnit.MILLISECONDS);
         }
-        initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);
-
-        for (QueryDisplay.TaskDisplay task : 
hQuery.getQueryDisplay().getTaskDisplays()) {
-          parentContext = Context.current().with(rootspan);
-          tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
-                  
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
-                  .setStartTimestamp(task.getBeginTime(), 
TimeUnit.MILLISECONDS).startSpan()
-                  .end(task.getEndTime(), TimeUnit.MILLISECONDS);
-        }
-        
-        
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), 
TimeUnit.MILLISECONDS);
       }
     }
     

Reply via email to