This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6172615f707 [SPARK-43457][CONNECT][PYTHON] Augument user agent with OS, Python and Spark versions 6172615f707 is described below commit 6172615f70785b71224ecbc797de2f679ab0d593 Author: Niranjan Jayakar <n...@databricks.com> AuthorDate: Tue May 16 17:36:02 2023 +0900 [SPARK-43457][CONNECT][PYTHON] Augument user agent with OS, Python and Spark versions ### What changes were proposed in this pull request? Augument the user agent string sent over the service to include operating system and Python version. ### Why are the changes needed? Including OS, Python and Spark versions in the user agent improves tracking to see how Spark Connect is used across Python versions and platforms. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests attached. Closes #41138 from nija-at/user-agent-info. Lead-authored-by: Niranjan Jayakar <n...@databricks.com> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/client.py | 11 ++++++++++- python/pyspark/sql/tests/connect/test_client.py | 6 ++++-- python/pyspark/sql/tests/connect/test_connect_basic.py | 10 ++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py index a2a2cc4cf5e..c1675eac9e1 100644 --- a/python/pyspark/sql/connect/client.py +++ b/python/pyspark/sql/connect/client.py @@ -25,6 +25,7 @@ check_dependencies(__name__) import logging import os +import platform import random import time import urllib.parse @@ -57,6 +58,7 @@ import grpc from google.protobuf import text_format from google.rpc import error_details_pb2 +from pyspark.version import __version__ from pyspark.resource.information import ResourceInformation from pyspark.sql.connect.conversion import storage_level_to_proto, proto_to_storage_level import pyspark.sql.connect.proto as pb2 @@ -299,7 +301,14 @@ class ChannelBuilder: raise SparkConnectException( f"'user_agent' parameter should not exceed 2048 characters, found {len} characters." ) - return user_agent + return " ".join( + [ + user_agent, + f"spark/{__version__}", + f"os/{platform.uname().system.lower()}", + f"python/{platform.python_version()}", + ] + ) def get(self, key: str) -> Any: """ diff --git a/python/pyspark/sql/tests/connect/test_client.py b/python/pyspark/sql/tests/connect/test_client.py index 191a5204bf3..722be1e2882 100644 --- a/python/pyspark/sql/tests/connect/test_client.py +++ b/python/pyspark/sql/tests/connect/test_client.py @@ -37,7 +37,7 @@ class SparkConnectClientTestCase(unittest.TestCase): client.execute_command(command) self.assertIsNotNone(mock.req, "ExecutePlan API was not called when expected") - self.assertEqual(mock.req.client_type, "bar") + self.assertRegex(mock.req.client_type, r"^bar spark/[^ ]+ os/[^ ]+ python/[^ ]+$") def test_user_agent_default(self): client = SparkConnectClient("sc://foo/") @@ -48,7 +48,9 @@ class SparkConnectClientTestCase(unittest.TestCase): client.execute_command(command) self.assertIsNotNone(mock.req, "ExecutePlan API was not called when expected") - self.assertEqual(mock.req.client_type, "_SPARK_CONNECT_PYTHON") + self.assertRegex( + mock.req.client_type, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ ]+ python/[^ ]+$" + ) def test_properties(self): client = SparkConnectClient("sc://foo/;token=bar") diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index b0bc2cba78e..8a83d040207 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -3404,13 +3404,15 @@ class ChannelBuilderTests(unittest.TestCase): chan = ChannelBuilder("sc://host/;token=abcs") self.assertTrue(chan.secure, "specifying a token must set the channel to secure") - self.assertEqual(chan.userAgent, "_SPARK_CONNECT_PYTHON") + self.assertRegex( + chan.userAgent, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ ]+ python/[^ ]+$" + ) chan = ChannelBuilder("sc://host/;use_ssl=abcs") self.assertFalse(chan.secure, "Garbage in, false out") def test_user_agent(self): chan = ChannelBuilder("sc://host/;user_agent=Agent123%20%2F3.4") - self.assertEqual("Agent123 /3.4", chan.userAgent) + self.assertIn("Agent123 /3.4", chan.userAgent) def test_user_agent_len(self): user_agent = "x" * 2049 @@ -3422,7 +3424,7 @@ class ChannelBuilderTests(unittest.TestCase): user_agent = "%C3%A4" * 341 # "%C3%A4" -> "ä"; (341 * 6 = 2046) < 2048 expected = "ä" * 341 chan = ChannelBuilder(f"sc://host/;user_agent={user_agent}") - self.assertEqual(expected, chan.userAgent) + self.assertIn(expected, chan.userAgent) def test_valid_channel_creation(self): chan = ChannelBuilder("sc://host").toChannel() @@ -3438,7 +3440,7 @@ class ChannelBuilderTests(unittest.TestCase): def test_channel_properties(self): chan = ChannelBuilder("sc://host/;use_ssl=true;token=abc;user_agent=foo;param1=120%2021") self.assertEqual("host:15002", chan.endpoint) - self.assertEqual("foo", chan.userAgent) + self.assertIn("foo", chan.userAgent.split(" ")) self.assertEqual(True, chan.secure) self.assertEqual("120 21", chan.get("param1")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org