HyukjinKwon commented on code in PR #38485:
URL: https://github.com/apache/spark/pull/38485#discussion_r1012749805


##########
python/pyspark/sql/connect/client.py:
##########
@@ -42,6 +43,140 @@
 logging.basicConfig(level=logging.INFO)
 
 
+class ChannelBuilder:
+    """
+    This is a helper class that is used to create a GRPC channel based on the 
given
+    connection string per the documentation of Spark Connect.
+
+    Examples
+    --------
+    >>> cb =  ChannelBuilder("sc://localhost")
+    ... cb.endpoint
+    "localhost:15002
+
+
+    >>> cb = ChannelBuilder("sc://localhost/;use_ssl=true;token=aaa")
+    ... cb.secure
+    True
+
+    .. versionadded:: 3.4.0
+
+    """
+
+    PARAM_USE_SSL = "use_ssl"
+    PARAM_TOKEN = "token"
+    PARAM_USER_ID = "user_id"
+
+    DEFAULT_PORT = 15002
+
+    def __init__(self, url: str) -> None:
+        # Explicitly check the scheme of the URL.
+        if url[:5] != "sc://":
+            raise AttributeError("URL scheme must be set to `sc`.")
+        # Rewrite the URL to use http as the scheme so that we can leverage
+        # Python's built-in parser.
+        tmp_url = "http" + url[2:]
+        self.url = urllib.parse.urlparse(tmp_url)
+        self.params: typing.Dict[str, str] = {}
+        if len(self.url.path) > 0 and self.url.path != "/":
+            raise AttributeError(
+                f"Path component for connection URI must be empty: 
{self.url.path}"
+            )
+        self._extract_attributes()
+
+    def _extract_attributes(self) -> None:
+        if len(self.url.params) > 0:
+            parts = self.url.params.split(";")
+            for p in parts:
+                kv = p.split("=")
+                if len(kv) != 2:
+                    raise AttributeError(f"Parameter '{p}' is not a valid 
parameter key-value pair")
+                self.params[kv[0]] = urllib.parse.unquote(kv[1])
+
+        netloc = self.url.netloc.split(":")
+        if len(netloc) == 1:
+            self.host = netloc[0]
+            self.port = ChannelBuilder.DEFAULT_PORT
+        elif len(netloc) == 2:
+            self.host = netloc[0]
+            self.port = int(netloc[1])
+        else:
+            raise AttributeError(
+                f"Target destination {self.url.netloc} does not match 
'<host>:<port>' pattern"
+            )
+
+    def metadata(self) -> typing.Iterable[typing.Tuple[str, str]]:
+        """
+        Builds the GRPC specific metadata list to be injected into the 
request. All
+        parameters will be converted to metadata except ones that are 
explicitly used
+        by the channel.
+
+        Returns
+        -------
+        A list of tuples (key, value)
+        """
+        return [
+            (k, self.params[k])
+            for k in self.params
+            if k
+            not in [
+                ChannelBuilder.PARAM_TOKEN,
+                ChannelBuilder.PARAM_USE_SSL,
+                ChannelBuilder.PARAM_USER_ID,
+            ]
+        ]
+
+    @property
+    def secure(self) -> bool:
+        value = self.params.get(ChannelBuilder.PARAM_USE_SSL, "")
+        return value.lower() == "true"
+
+    @property
+    def endpoint(self) -> str:
+        return f"{self.host}:{self.port}"
+
+    def get(self, key: str) -> Any:
+        """
+        Parameters
+        ----------
+        key : str
+            Parameter key name.
+
+        Returns
+        -------
+        The parameter value if present, raises exception otherwise.
+        """
+        return self.params[key]
+
+    def to_channel(self) -> grpc.Channel:
+        """
+        Applies the parameters of the connection string and creates a new
+        GRPC channel according to the configuration.
+
+        Returns
+        -------
+        GRPC Channel instance.
+        """
+        destination = f"{self.host}:{self.port}"
+        if not self.secure:
+            if self.params.get(ChannelBuilder.PARAM_TOKEN, None) is not None:
+                raise AttributeError("Token based authentication cannot be 
used without TLS")
+            print("insecure channel")

Review Comment:
   Would probably have to remove print but use `warnings.warn`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to