This is an automated email from the ASF dual-hosted git repository. ueshin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8181260 [SPARK-36906][PYTHON] Inline type hints for conf.py and observation.py in python/pyspark/sql 8181260 is described below commit 81812606cc32a41863c86695c4710b0e914b7e3c Author: Xinrong Meng <xinrong.m...@databricks.com> AuthorDate: Mon Oct 4 14:32:25 2021 -0700 [SPARK-36906][PYTHON] Inline type hints for conf.py and observation.py in python/pyspark/sql ### What changes were proposed in this pull request? Inline type hints for conf.py and observation.py in python/pyspark/sql. ### Why are the changes needed? Currently, there is type hint stub files (*.pyi) to show the expected types for functions, but we can also take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No. It has a DOC typo fix: `Metrics are aggregation expressions, which are applied to the DataFrame while **is** is being` is changed to `Metrics are aggregation expressions, which are applied to the DataFrame while **it** is being` ### How was this patch tested? Existing test. Closes #34159 from xinrong-databricks/inline_conf_observation. Authored-by: Xinrong Meng <xinrong.m...@databricks.com> Signed-off-by: Takuya UESHIN <ues...@databricks.com> --- python/pyspark/__init__.pyi | 4 ++-- python/pyspark/sql/conf.py | 19 +++++++++++-------- python/pyspark/sql/conf.pyi | 27 --------------------------- python/pyspark/sql/observation.py | 24 ++++++++++++++---------- python/pyspark/sql/observation.pyi | 27 --------------------------- 5 files changed, 27 insertions(+), 74 deletions(-) diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi index 07cbccb..f85319b 100644 --- a/python/pyspark/__init__.pyi +++ b/python/pyspark/__init__.pyi @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Callable, Optional, TypeVar +from typing import Callable, Optional, TypeVar, Union from pyspark.accumulators import ( # noqa: F401 Accumulator as Accumulator, @@ -67,7 +67,7 @@ from pyspark.sql import ( # noqa: F401 T = TypeVar("T") F = TypeVar("F", bound=Callable) -def since(version: str) -> Callable[[T], T]: ... +def since(version: Union[str, float]) -> Callable[[T], T]: ... def copy_func( f: F, name: Optional[str] = ..., diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py index eab084a..54ae6fb 100644 --- a/python/pyspark/sql/conf.py +++ b/python/pyspark/sql/conf.py @@ -16,8 +16,11 @@ # import sys +from typing import Any, Optional -from pyspark import since, _NoValue +from py4j.java_gateway import JavaObject + +from pyspark import since, _NoValue # type: ignore[attr-defined] class RuntimeConfig(object): @@ -26,17 +29,17 @@ class RuntimeConfig(object): Options set here are automatically propagated to the Hadoop configuration during I/O. """ - def __init__(self, jconf): + def __init__(self, jconf: JavaObject) -> None: """Create a new RuntimeConfig that wraps the underlying JVM object.""" self._jconf = jconf @since(2.0) - def set(self, key, value): + def set(self, key: str, value: str) -> None: """Sets the given Spark runtime configuration property.""" self._jconf.set(key, value) @since(2.0) - def get(self, key, default=_NoValue): + def get(self, key: str, default: Optional[str] = _NoValue) -> str: """Returns the value of Spark runtime configuration property for the given key, assuming it is set. """ @@ -49,25 +52,25 @@ class RuntimeConfig(object): return self._jconf.get(key, default) @since(2.0) - def unset(self, key): + def unset(self, key: str) -> None: """Resets the configuration property for the given key.""" self._jconf.unset(key) - def _checkType(self, obj, identifier): + def _checkType(self, obj: Any, identifier: str) -> None: """Assert that an object is of type str.""" if not isinstance(obj, str): raise TypeError("expected %s '%s' to be a string (was '%s')" % (identifier, obj, type(obj).__name__)) @since(2.4) - def isModifiable(self, key): + def isModifiable(self, key: str) -> bool: """Indicates whether the configuration property with the given key is modifiable in the current session. """ return self._jconf.isModifiable(key) -def _test(): +def _test() -> None: import os import doctest from pyspark.sql.session import SparkSession diff --git a/python/pyspark/sql/conf.pyi b/python/pyspark/sql/conf.pyi deleted file mode 100644 index 3e88f84..0000000 --- a/python/pyspark/sql/conf.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Optional -from py4j.java_gateway import JavaObject # type: ignore[import] - -class RuntimeConfig: - def __init__(self, jconf: JavaObject) -> None: ... - def set(self, key: str, value: str) -> None: ... - def get(self, key: str, default: Optional[str] = ...) -> str: ... - def unset(self, key: str) -> None: ... - def isModifiable(self, key: str) -> bool: ... diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py index 55c0922..3e8a0d1 100644 --- a/python/pyspark/sql/observation.py +++ b/python/pyspark/sql/observation.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import Any, Dict, Optional + from pyspark.sql import column from pyspark.sql.column import Column from pyspark.sql.dataframe import DataFrame @@ -24,7 +26,7 @@ __all__ = ["Observation"] class Observation: """Class to observe (named) metrics on a :class:`DataFrame`. - Metrics are aggregation expressions, which are applied to the DataFrame while is is being + Metrics are aggregation expressions, which are applied to the DataFrame while it is being processed by an action. The metrics have the following guarantees: @@ -61,7 +63,7 @@ class Observation: >>> observation.get {'count': 2, 'max(age)': 5} """ - def __init__(self, name=None): + def __init__(self, name: Optional[str] = None) -> None: """Constructs a named or unnamed Observation instance. Parameters @@ -78,7 +80,7 @@ class Observation: self._jvm = None self._jo = None - def _on(self, df, *exprs): + def _on(self, df: DataFrame, *exprs: Column) -> DataFrame: """Attaches this observation to the given :class:`DataFrame` to observe aggregations. Parameters @@ -97,16 +99,18 @@ class Observation: assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column" assert self._jo is None, "an Observation can be used with a DataFrame only once" - self._jvm = df._sc._jvm - cls = self._jvm.org.apache.spark.sql.Observation + self._jvm = df._sc._jvm # type: ignore[assignment] + cls = self._jvm.org.apache.spark.sql.Observation # type: ignore[attr-defined] self._jo = cls(self._name) if self._name is not None else cls() - observed_df = self._jo.on(df._jdf, - exprs[0]._jc, - column._to_seq(df._sc, [c._jc for c in exprs[1:]])) + observed_df = self._jo.on( # type: ignore[attr-defined] + df._jdf, + exprs[0]._jc, + column._to_seq(df._sc, [c._jc for c in exprs[1:]]) # type: ignore[attr-defined] + ) return DataFrame(observed_df, df.sql_ctx) @property - def get(self): + def get(self) -> Dict[str, Any]: """Get the observed metrics. Waits until the observed dataset finishes its first action. Only the result of the @@ -123,7 +127,7 @@ class Observation: return {k: v for k, v in jmap.items()} -def _test(): +def _test() -> None: import doctest import sys from pyspark.context import SparkContext diff --git a/python/pyspark/sql/observation.pyi b/python/pyspark/sql/observation.pyi deleted file mode 100644 index 8709300..0000000 --- a/python/pyspark/sql/observation.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Optional, Dict, Any - -from py4j.java_gateway import JavaObject # type: ignore[import] - - -class Observation: - def __init__(self, name: Optional[str] = ...): ... - @property - def get(self) -> Dict[str, Any]: ... --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org