This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bd627503f96 [SPARK-45485][CONNECT] User agent improvements: Use 
SPARK_CONNECT_USER_AGENT env variable and include environment specific 
attributes
bd627503f96 is described below

commit bd627503f96758edae028a269b7a6ac203a8d941
Author: Robert Dillitz <robert.dill...@databricks.com>
AuthorDate: Tue Oct 17 19:32:29 2023 +0900

    [SPARK-45485][CONNECT] User agent improvements: Use 
SPARK_CONNECT_USER_AGENT env variable and include environment specific 
attributes
    
    ### What changes were proposed in this pull request?
    With this PR similar to the[ Python 
client](https://github.com/apache/spark/blob/2cc1ee4d3a05a641d7a245f015ef824d8f7bae8b/python/pyspark/sql/connect/client/core.py#L284)
 the Scala client's user agent now:
    
    1. Uses the SPARK_CONNECT_USER_AGENT environment variable if set
    2. Includes the OS, JVM version, Scala version, and Spark version
    
    ### Why are the changes needed?
    Feature parity with the Python client. Better observability of Scala Spark 
Connect clients.
    
    ### Does this PR introduce _any_ user-facing change?
    By default, the user agent string now contains more useful information.
    Before:
    `_SPARK_CONNECT_SCALA`
    After:
    `_SPARK_CONNECT_SCALA spark/4.0.0-SNAPSHOT scala/2.13.12 jvm/17.0.8.1 
os/darwin`
    
    ### How was this patch tested?
    Tests added & adjusted.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43313 from dillitz/user-agent-improvements.
    
    Authored-by: Robert Dillitz <robert.dill...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../SparkConnectClientBuilderParseTestSuite.scala  |  8 +++---
 .../connect/client/SparkConnectClientSuite.scala   | 13 ++++++++--
 .../sql/connect/client/SparkConnectClient.scala    | 30 +++++++++++++++++++---
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
index e1d4a18d0ff..68d2e86b19d 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
@@ -47,7 +47,7 @@ class SparkConnectClientBuilderParseTestSuite extends 
ConnectFunSuite {
   argumentTest("token", "azbycxdwev1234567890", _.token.get)
   argumentTest("user_id", "U1238", _.userId.get)
   argumentTest("user_name", "alice", _.userName.get)
-  argumentTest("user_agent", "MY APP", _.userAgent)
+  argumentTest("user_agent", "robert", _.userAgent.split(" ")(0))
   argumentTest("session_id", UUID.randomUUID().toString, _.sessionId.get)
 
   test("Argument - remote") {
@@ -95,7 +95,7 @@ class SparkConnectClientBuilderParseTestSuite extends 
ConnectFunSuite {
         "Q12")
       assert(builder.host === "localhost")
       assert(builder.port === 1507)
-      assert(builder.userAgent === "U8912")
+      assert(builder.userAgent.contains("U8912"))
       assert(!builder.sslEnabled)
       assert(builder.token.isEmpty)
       assert(builder.userId.contains("Q12"))
@@ -113,7 +113,7 @@ class SparkConnectClientBuilderParseTestSuite extends 
ConnectFunSuite {
         "cluster=mycl")
       assert(builder.host === "localhost")
       assert(builder.port === 15002)
-      assert(builder.userAgent == "_SPARK_CONNECT_SCALA")
+      assert(builder.userAgent.contains("_SPARK_CONNECT_SCALA"))
       assert(builder.sslEnabled)
       assert(builder.token.isEmpty)
       assert(builder.userId.isEmpty)
@@ -124,7 +124,7 @@ class SparkConnectClientBuilderParseTestSuite extends 
ConnectFunSuite {
       val builder = build("--token", "thisismysecret")
       assert(builder.host === "localhost")
       assert(builder.port === 15002)
-      assert(builder.userAgent === "_SPARK_CONNECT_SCALA")
+      assert(builder.userAgent.contains("_SPARK_CONNECT_SCALA"))
       assert(builder.sslEnabled)
       assert(builder.token.contains("thisismysecret"))
       assert(builder.userId.isEmpty)
diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
index a3df39da4a8..b3ff4eb0bb2 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -270,7 +270,7 @@ class SparkConnectClientSuite extends ConnectFunSuite with 
BeforeAndAfterEach {
     TestPackURI(
       "sc://host:123/;user_agent=a945",
       isCorrect = true,
-      client => assert(client.userAgent == "a945")),
+      client => assert(client.userAgent.contains("a945"))),
     TestPackURI("scc://host:12", isCorrect = false),
     TestPackURI("http://host";, isCorrect = false),
     TestPackURI("sc:/host:1234/path", isCorrect = false),
@@ -290,7 +290,16 @@ class SparkConnectClientSuite extends ConnectFunSuite with 
BeforeAndAfterEach {
     TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=true", isCorrect = 
true),
     TestPackURI("sc://host:123/;use_ssl=false;token=mySecretToken", isCorrect 
= false),
     TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=false", isCorrect 
= false),
-    TestPackURI("sc://host:123/;param1=value1;param2=value2", isCorrect = 
true))
+    TestPackURI("sc://host:123/;param1=value1;param2=value2", isCorrect = 
true),
+    TestPackURI(
+      "sc://SPARK-45486",
+      isCorrect = true,
+      client => {
+        assert(client.userAgent.contains("spark/"))
+        assert(client.userAgent.contains("scala/"))
+        assert(client.userAgent.contains("jvm/"))
+        assert(client.userAgent.contains("os/"))
+      }))
 
   private def checkTestPack(testPack: TestPackURI): Unit = {
     val client = 
SparkConnectClient.builder().connectionString(testPack.connectionString).build()
diff --git 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
index 86ed2a29667..42ace003da8 100644
--- 
a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
+++ 
b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
@@ -18,15 +18,17 @@
 package org.apache.spark.sql.connect.client
 
 import java.net.URI
-import java.util.UUID
+import java.util.{Locale, UUID}
 import java.util.concurrent.Executor
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
+import scala.util.Properties
 
 import com.google.protobuf.ByteString
 import io.grpc._
 
+import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION}
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.UserContext
 import org.apache.spark.sql.connect.common.ProtoUtils
@@ -466,7 +468,7 @@ object SparkConnectClient {
 
     def userAgent(value: String): Builder = {
       require(value != null)
-      _configuration = _configuration.copy(userAgent = value)
+      _configuration = _configuration.copy(userAgent = genUserAgent(value))
       this
     }
 
@@ -585,6 +587,27 @@ object SparkConnectClient {
     def build(): SparkConnectClient = _configuration.toSparkConnectClient
   }
 
+  /**
+   * Appends the Spark, Scala & JVM version, and the used OS to the 
user-provided user agent.
+   */
+  private def genUserAgent(value: String): String = {
+    val scalaVersion = Properties.versionNumberString
+    val jvmVersion = System.getProperty("java.version").split("_")(0)
+    val osName = {
+      val os = System.getProperty("os.name").toLowerCase(Locale.ROOT)
+      if (os.contains("mac")) "darwin"
+      else if (os.contains("linux")) "linux"
+      else if (os.contains("win")) "windows"
+      else "unknown"
+    }
+    List(
+      value,
+      s"spark/$SPARK_VERSION",
+      s"scala/$scalaVersion",
+      s"jvm/$jvmVersion",
+      s"os/$osName").mkString(" ")
+  }
+
   /**
    * Helper class that fully captures the configuration for a 
[[SparkConnectClient]].
    */
@@ -596,7 +619,8 @@ object SparkConnectClient {
       token: Option[String] = None,
       isSslEnabled: Option[Boolean] = None,
       metadata: Map[String, String] = Map.empty,
-      userAgent: String = DEFAULT_USER_AGENT,
+      userAgent: String = genUserAgent(
+        sys.env.getOrElse("SPARK_CONNECT_USER_AGENT", DEFAULT_USER_AGENT)),
       retryPolicy: GrpcRetryHandler.RetryPolicy = 
GrpcRetryHandler.RetryPolicy(),
       useReattachableExecute: Boolean = true,
       interceptors: List[ClientInterceptor] = List.empty,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to