This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6172615f707 [SPARK-43457][CONNECT][PYTHON] Augument user agent with 
OS, Python and Spark versions
6172615f707 is described below

commit 6172615f70785b71224ecbc797de2f679ab0d593
Author: Niranjan Jayakar <n...@databricks.com>
AuthorDate: Tue May 16 17:36:02 2023 +0900

    [SPARK-43457][CONNECT][PYTHON] Augument user agent with OS, Python and 
Spark versions
    
    ### What changes were proposed in this pull request?
    
    Augument the user agent string sent over the service to include
    operating system and Python version.
    
    ### Why are the changes needed?
    
    Including OS, Python and Spark versions in the user agent improves
    tracking to see how Spark Connect is used across Python versions
    and platforms.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Unit tests attached.
    
    Closes #41138 from nija-at/user-agent-info.
    
    Lead-authored-by: Niranjan Jayakar <n...@databricks.com>
    Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/connect/client.py                   | 11 ++++++++++-
 python/pyspark/sql/tests/connect/test_client.py        |  6 ++++--
 python/pyspark/sql/tests/connect/test_connect_basic.py | 10 ++++++----
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/connect/client.py 
b/python/pyspark/sql/connect/client.py
index a2a2cc4cf5e..c1675eac9e1 100644
--- a/python/pyspark/sql/connect/client.py
+++ b/python/pyspark/sql/connect/client.py
@@ -25,6 +25,7 @@ check_dependencies(__name__)
 
 import logging
 import os
+import platform
 import random
 import time
 import urllib.parse
@@ -57,6 +58,7 @@ import grpc
 from google.protobuf import text_format
 from google.rpc import error_details_pb2
 
+from pyspark.version import __version__
 from pyspark.resource.information import ResourceInformation
 from pyspark.sql.connect.conversion import storage_level_to_proto, 
proto_to_storage_level
 import pyspark.sql.connect.proto as pb2
@@ -299,7 +301,14 @@ class ChannelBuilder:
             raise SparkConnectException(
                 f"'user_agent' parameter should not exceed 2048 characters, 
found {len} characters."
             )
-        return user_agent
+        return " ".join(
+            [
+                user_agent,
+                f"spark/{__version__}",
+                f"os/{platform.uname().system.lower()}",
+                f"python/{platform.python_version()}",
+            ]
+        )
 
     def get(self, key: str) -> Any:
         """
diff --git a/python/pyspark/sql/tests/connect/test_client.py 
b/python/pyspark/sql/tests/connect/test_client.py
index 191a5204bf3..722be1e2882 100644
--- a/python/pyspark/sql/tests/connect/test_client.py
+++ b/python/pyspark/sql/tests/connect/test_client.py
@@ -37,7 +37,7 @@ class SparkConnectClientTestCase(unittest.TestCase):
         client.execute_command(command)
 
         self.assertIsNotNone(mock.req, "ExecutePlan API was not called when 
expected")
-        self.assertEqual(mock.req.client_type, "bar")
+        self.assertRegex(mock.req.client_type, r"^bar spark/[^ ]+ os/[^ ]+ 
python/[^ ]+$")
 
     def test_user_agent_default(self):
         client = SparkConnectClient("sc://foo/")
@@ -48,7 +48,9 @@ class SparkConnectClientTestCase(unittest.TestCase):
         client.execute_command(command)
 
         self.assertIsNotNone(mock.req, "ExecutePlan API was not called when 
expected")
-        self.assertEqual(mock.req.client_type, "_SPARK_CONNECT_PYTHON")
+        self.assertRegex(
+            mock.req.client_type, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ 
]+ python/[^ ]+$"
+        )
 
     def test_properties(self):
         client = SparkConnectClient("sc://foo/;token=bar")
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py 
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index b0bc2cba78e..8a83d040207 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -3404,13 +3404,15 @@ class ChannelBuilderTests(unittest.TestCase):
 
         chan = ChannelBuilder("sc://host/;token=abcs")
         self.assertTrue(chan.secure, "specifying a token must set the channel 
to secure")
-        self.assertEqual(chan.userAgent, "_SPARK_CONNECT_PYTHON")
+        self.assertRegex(
+            chan.userAgent, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ ]+ 
python/[^ ]+$"
+        )
         chan = ChannelBuilder("sc://host/;use_ssl=abcs")
         self.assertFalse(chan.secure, "Garbage in, false out")
 
     def test_user_agent(self):
         chan = ChannelBuilder("sc://host/;user_agent=Agent123%20%2F3.4")
-        self.assertEqual("Agent123 /3.4", chan.userAgent)
+        self.assertIn("Agent123 /3.4", chan.userAgent)
 
     def test_user_agent_len(self):
         user_agent = "x" * 2049
@@ -3422,7 +3424,7 @@ class ChannelBuilderTests(unittest.TestCase):
         user_agent = "%C3%A4" * 341  # "%C3%A4" -> "ä"; (341 * 6 = 2046) < 2048
         expected = "ä" * 341
         chan = ChannelBuilder(f"sc://host/;user_agent={user_agent}")
-        self.assertEqual(expected, chan.userAgent)
+        self.assertIn(expected, chan.userAgent)
 
     def test_valid_channel_creation(self):
         chan = ChannelBuilder("sc://host").toChannel()
@@ -3438,7 +3440,7 @@ class ChannelBuilderTests(unittest.TestCase):
     def test_channel_properties(self):
         chan = 
ChannelBuilder("sc://host/;use_ssl=true;token=abc;user_agent=foo;param1=120%2021")
         self.assertEqual("host:15002", chan.endpoint)
-        self.assertEqual("foo", chan.userAgent)
+        self.assertIn("foo", chan.userAgent.split(" "))
         self.assertEqual(True, chan.secure)
         self.assertEqual("120 21", chan.get("param1"))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to