This is an automated email from the ASF dual-hosted git repository.

ptoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2764ee0e932 [SPARK-45805][SQL] Make `withOrigin` more generic
2764ee0e932 is described below

commit 2764ee0e9329a68ef10b6dc79fec20c722aaaf96
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Mon Nov 6 21:18:43 2023 +0100

    [SPARK-45805][SQL] Make `withOrigin` more generic
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to change the implementation of `sql.withOrigin`, and 
eliminate the magic number 3 from which the algorithm starts iterations. New 
implementation starts from the index 0, and finds the first block of Spark 
traces. It stops immediately after the block at the first non-Spark trace. For 
example:
    
    <img width="862" alt="Screenshot 2023-11-01 at 21 29 18" 
src="https://github.com/apache/spark/assets/1580697/f80cf071-34b5-4d56-9f1c-c5bb28c87cab";>
    
    new implementation finds the block [2, 4], and stops at the index 5 by 
catching and returning the block of traces [4, 6].
    
    ### Why are the changes needed?
    The PR makes `withOrigin` more generic and improves code maintenance.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    By existing test suites, for instance:
    ```
    $ build/sbt "test:testOnly *DatasetSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43671 from MaxGekk/refactor-withOrigin.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Peter Toth <peter.t...@gmail.com>
---
 .../spark/sql/catalyst/trees/QueryContexts.scala       | 18 ++++++++++--------
 .../src/main/scala/org/apache/spark/sql/package.scala  |  8 +++++---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
index b8288b24535..8d885d07ca8 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
@@ -160,14 +160,16 @@ case class DataFrameQueryContext(
 
 object DataFrameQueryContext {
   def apply(elements: Array[StackTraceElement]): DataFrameQueryContext = {
-    val methodName = elements(0).getMethodName
-    val code = if (methodName.length > 1 && methodName(0) == '$') {
-      methodName.substring(1)
-    } else {
-      methodName
-    }
-    val callSite = elements(1).toString
+    val fragment = elements.headOption.map { firstElem =>
+      val methodName = firstElem.getMethodName
+      if (methodName.length > 1 && methodName(0) == '$') {
+        methodName.substring(1)
+      } else {
+        methodName
+      }
+    }.getOrElse("")
+    val callSite = elements.tail.headOption.map(_.toString).getOrElse("")
 
-    DataFrameQueryContext(code, callSite)
+    DataFrameQueryContext(fragment, callSite)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 7f00f6d6317..96bef83af0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -98,10 +98,12 @@ package object sql {
       f
     } else {
       val st = Thread.currentThread().getStackTrace
-      var i = 3
+      var i = 0
+      // Find the beginning of Spark code traces
+      while (i < st.length && !sparkCode(st(i))) i += 1
+      // Stop at the end of the first Spark code traces
       while (i < st.length && sparkCode(st(i))) i += 1
-      val origin =
-        Origin(stackTrace = Some(Thread.currentThread().getStackTrace.slice(i 
- 1, i + 1)))
+      val origin = Origin(stackTrace = Some(st.slice(i - 1, i + 1)))
       CurrentOrigin.withOrigin(origin)(f)
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to