This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new bd627503f96 [SPARK-45485][CONNECT] User agent improvements: Use SPARK_CONNECT_USER_AGENT env variable and include environment specific attributes bd627503f96 is described below commit bd627503f96758edae028a269b7a6ac203a8d941 Author: Robert Dillitz <robert.dill...@databricks.com> AuthorDate: Tue Oct 17 19:32:29 2023 +0900 [SPARK-45485][CONNECT] User agent improvements: Use SPARK_CONNECT_USER_AGENT env variable and include environment specific attributes ### What changes were proposed in this pull request? With this PR similar to the[ Python client](https://github.com/apache/spark/blob/2cc1ee4d3a05a641d7a245f015ef824d8f7bae8b/python/pyspark/sql/connect/client/core.py#L284) the Scala client's user agent now: 1. Uses the SPARK_CONNECT_USER_AGENT environment variable if set 2. Includes the OS, JVM version, Scala version, and Spark version ### Why are the changes needed? Feature parity with the Python client. Better observability of Scala Spark Connect clients. ### Does this PR introduce _any_ user-facing change? By default, the user agent string now contains more useful information. Before: `_SPARK_CONNECT_SCALA` After: `_SPARK_CONNECT_SCALA spark/4.0.0-SNAPSHOT scala/2.13.12 jvm/17.0.8.1 os/darwin` ### How was this patch tested? Tests added & adjusted. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43313 from dillitz/user-agent-improvements. Authored-by: Robert Dillitz <robert.dill...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../SparkConnectClientBuilderParseTestSuite.scala | 8 +++--- .../connect/client/SparkConnectClientSuite.scala | 13 ++++++++-- .../sql/connect/client/SparkConnectClient.scala | 30 +++++++++++++++++++--- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala index e1d4a18d0ff..68d2e86b19d 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala @@ -47,7 +47,7 @@ class SparkConnectClientBuilderParseTestSuite extends ConnectFunSuite { argumentTest("token", "azbycxdwev1234567890", _.token.get) argumentTest("user_id", "U1238", _.userId.get) argumentTest("user_name", "alice", _.userName.get) - argumentTest("user_agent", "MY APP", _.userAgent) + argumentTest("user_agent", "robert", _.userAgent.split(" ")(0)) argumentTest("session_id", UUID.randomUUID().toString, _.sessionId.get) test("Argument - remote") { @@ -95,7 +95,7 @@ class SparkConnectClientBuilderParseTestSuite extends ConnectFunSuite { "Q12") assert(builder.host === "localhost") assert(builder.port === 1507) - assert(builder.userAgent === "U8912") + assert(builder.userAgent.contains("U8912")) assert(!builder.sslEnabled) assert(builder.token.isEmpty) assert(builder.userId.contains("Q12")) @@ -113,7 +113,7 @@ class SparkConnectClientBuilderParseTestSuite extends ConnectFunSuite { "cluster=mycl") assert(builder.host === "localhost") assert(builder.port === 15002) - assert(builder.userAgent == "_SPARK_CONNECT_SCALA") + assert(builder.userAgent.contains("_SPARK_CONNECT_SCALA")) assert(builder.sslEnabled) assert(builder.token.isEmpty) assert(builder.userId.isEmpty) @@ -124,7 +124,7 @@ class SparkConnectClientBuilderParseTestSuite extends ConnectFunSuite { val builder = build("--token", "thisismysecret") assert(builder.host === "localhost") assert(builder.port === 15002) - assert(builder.userAgent === "_SPARK_CONNECT_SCALA") + assert(builder.userAgent.contains("_SPARK_CONNECT_SCALA")) assert(builder.sslEnabled) assert(builder.token.contains("thisismysecret")) assert(builder.userId.isEmpty) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala index a3df39da4a8..b3ff4eb0bb2 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala @@ -270,7 +270,7 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach { TestPackURI( "sc://host:123/;user_agent=a945", isCorrect = true, - client => assert(client.userAgent == "a945")), + client => assert(client.userAgent.contains("a945"))), TestPackURI("scc://host:12", isCorrect = false), TestPackURI("http://host", isCorrect = false), TestPackURI("sc:/host:1234/path", isCorrect = false), @@ -290,7 +290,16 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach { TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=true", isCorrect = true), TestPackURI("sc://host:123/;use_ssl=false;token=mySecretToken", isCorrect = false), TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=false", isCorrect = false), - TestPackURI("sc://host:123/;param1=value1;param2=value2", isCorrect = true)) + TestPackURI("sc://host:123/;param1=value1;param2=value2", isCorrect = true), + TestPackURI( + "sc://SPARK-45486", + isCorrect = true, + client => { + assert(client.userAgent.contains("spark/")) + assert(client.userAgent.contains("scala/")) + assert(client.userAgent.contains("jvm/")) + assert(client.userAgent.contains("os/")) + })) private def checkTestPack(testPack: TestPackURI): Unit = { val client = SparkConnectClient.builder().connectionString(testPack.connectionString).build() diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index 86ed2a29667..42ace003da8 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -18,15 +18,17 @@ package org.apache.spark.sql.connect.client import java.net.URI -import java.util.UUID +import java.util.{Locale, UUID} import java.util.concurrent.Executor import scala.collection.mutable import scala.jdk.CollectionConverters._ +import scala.util.Properties import com.google.protobuf.ByteString import io.grpc._ +import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION} import org.apache.spark.connect.proto import org.apache.spark.connect.proto.UserContext import org.apache.spark.sql.connect.common.ProtoUtils @@ -466,7 +468,7 @@ object SparkConnectClient { def userAgent(value: String): Builder = { require(value != null) - _configuration = _configuration.copy(userAgent = value) + _configuration = _configuration.copy(userAgent = genUserAgent(value)) this } @@ -585,6 +587,27 @@ object SparkConnectClient { def build(): SparkConnectClient = _configuration.toSparkConnectClient } + /** + * Appends the Spark, Scala & JVM version, and the used OS to the user-provided user agent. + */ + private def genUserAgent(value: String): String = { + val scalaVersion = Properties.versionNumberString + val jvmVersion = System.getProperty("java.version").split("_")(0) + val osName = { + val os = System.getProperty("os.name").toLowerCase(Locale.ROOT) + if (os.contains("mac")) "darwin" + else if (os.contains("linux")) "linux" + else if (os.contains("win")) "windows" + else "unknown" + } + List( + value, + s"spark/$SPARK_VERSION", + s"scala/$scalaVersion", + s"jvm/$jvmVersion", + s"os/$osName").mkString(" ") + } + /** * Helper class that fully captures the configuration for a [[SparkConnectClient]]. */ @@ -596,7 +619,8 @@ object SparkConnectClient { token: Option[String] = None, isSslEnabled: Option[Boolean] = None, metadata: Map[String, String] = Map.empty, - userAgent: String = DEFAULT_USER_AGENT, + userAgent: String = genUserAgent( + sys.env.getOrElse("SPARK_CONNECT_USER_AGENT", DEFAULT_USER_AGENT)), retryPolicy: GrpcRetryHandler.RetryPolicy = GrpcRetryHandler.RetryPolicy(), useReattachableExecute: Boolean = true, interceptors: List[ClientInterceptor] = List.empty, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org