This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 098f226 [SPARK-33730][PYTHON] Standardize warning types 098f226 is described below commit 098f2268e4ad43dd9453ada91161ea428dd57d16 Author: zero323 <mszymkiew...@gmail.com> AuthorDate: Mon Jan 18 09:32:55 2021 +0900 [SPARK-33730][PYTHON] Standardize warning types ### What changes were proposed in this pull request? This PR: - Adds as small hierarchy of warnings to be used in PySpark applications. These extend built-in classes and top level `PySparkWarning`. - Replaces `DeprecationWarnings` (intended for developers) with PySpark specific subclasses of `FutureWarning` (intended for end users). ### Why are the changes needed? - To be more precise and add users additional control (in addition to standard module level filters) over PySpark warnings handling. - Correct semantics (at the moment we use `DeprecationWarning` in user-facing API, but it is intended "for warnings about deprecated features when those warnings are intended for other Python developers"). ### Does this PR introduce _any_ user-facing change? Yes. Code can raise different type of warning than before. ### How was this patch tested? Existing tests. Closes #30985 from zero323/SPARK-33730. Authored-by: zero323 <mszymkiew...@gmail.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- python/pyspark/ml/clustering.py | 2 +- python/pyspark/mllib/classification.py | 2 +- python/pyspark/mllib/regression.py | 7 ++++--- python/pyspark/rdd.py | 10 ++++++---- python/pyspark/sql/catalog.py | 6 ++++-- python/pyspark/sql/column.py | 6 ++++-- python/pyspark/sql/context.py | 15 ++++++++++----- python/pyspark/sql/dataframe.py | 4 +++- python/pyspark/sql/functions.py | 6 +++--- python/pyspark/worker.py | 10 ++++++++-- 10 files changed, 44 insertions(+), 24 deletions(-) diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 54c1a43..60726cb 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -821,7 +821,7 @@ class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, Ja """ warnings.warn("Deprecated in 3.0.0. It will be removed in future versions. Use " "ClusteringEvaluator instead. You can also get the cost on the training " - "dataset in the summary.", DeprecationWarning) + "dataset in the summary.", FutureWarning) return self._call_java("computeCost", dataset) @property diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index bd43e91..5705401 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -324,7 +324,7 @@ class LogisticRegressionWithSGD(object): """ warnings.warn( "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or " - "LogisticRegressionWithLBFGS.", DeprecationWarning) + "LogisticRegressionWithLBFGS.", FutureWarning) def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations), diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index c224e38..3908e4a 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -299,7 +299,7 @@ class LinearRegressionWithSGD(object): (default: 0.001) """ warnings.warn( - "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning) + "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", FutureWarning) def train(rdd, i): return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations), @@ -453,7 +453,8 @@ class LassoWithSGD(object): warnings.warn( "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. " "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", - DeprecationWarning) + FutureWarning + ) def train(rdd, i): return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step), @@ -607,7 +608,7 @@ class RidgeRegressionWithSGD(object): warnings.warn( "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. " "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for " - "LinearRegression.", DeprecationWarning) + "LinearRegression.", FutureWarning) def train(rdd, i): return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step), diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 34faaac..3fed2bc 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -48,7 +48,6 @@ from pyspark.shuffle import Aggregator, ExternalMerger, \ from pyspark.traceback_utils import SCCallSiteSync from pyspark.util import fail_on_stopiteration, _parse_memory - __all__ = ["RDD"] @@ -448,8 +447,10 @@ class RDD(object): >>> rdd.mapPartitionsWithSplit(f).sum() 6 """ - warnings.warn("mapPartitionsWithSplit is deprecated; " - "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2) + warnings.warn( + "mapPartitionsWithSplit is deprecated; use mapPartitionsWithIndex instead", + FutureWarning, stacklevel=2 + ) return self.mapPartitionsWithIndex(f, preservesPartitioning) def getNumPartitions(self): @@ -960,7 +961,8 @@ class RDD(object): warnings.warn( "Deprecated in 3.1, Use pyspark.InheritableThread with " "the pinned thread mode enabled.", - DeprecationWarning) + FutureWarning + ) with SCCallSiteSync(self.context) as css: sock_info = self.ctx._jvm.PythonRDD.collectAndServeWithJobGroup( diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 70d68a0..d4b7d4e 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -153,7 +153,8 @@ class Catalog(object): """ warnings.warn( "createExternalTable is deprecated since Spark 2.2, please use createTable instead.", - DeprecationWarning) + FutureWarning + ) return self.createTable(tableName, path, source, schema, **options) def createTable( @@ -251,7 +252,8 @@ class Catalog(object): """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.register instead.", - DeprecationWarning) + FutureWarning + ) return self._sparkSession.udf.register(name, f, returnType) @since(2.0) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 7608054..391ee5e 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -323,7 +323,8 @@ class Column(object): "A column as 'key' in getItem is deprecated as of Spark 3.0, and will not " "be supported in the future release. Use `column[key]` or `column.key` syntax " "instead.", - DeprecationWarning) + FutureWarning + ) return self[key] def getField(self, name): @@ -354,7 +355,8 @@ class Column(object): "A column as 'name' in getField is deprecated as of Spark 3.0, and will not " "be supported in the future release. Use `column[name]` or `column.name` syntax " "instead.", - DeprecationWarning) + FutureWarning + ) return self[name] def withField(self, fieldName, col): diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 416bbde..ade82da 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -76,7 +76,8 @@ class SQLContext(object): if sparkSession is None: warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", - DeprecationWarning) + FutureWarning + ) self._sc = sparkContext self._jsc = self._sc._jsc @@ -123,7 +124,8 @@ class SQLContext(object): """ warnings.warn( "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.", - DeprecationWarning) + FutureWarning + ) if (cls._instantiatedContext is None or SQLContext._instantiatedContext._sc._jsc is None): @@ -229,7 +231,8 @@ class SQLContext(object): """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.register instead.", - DeprecationWarning) + FutureWarning + ) return self.sparkSession.udf.register(name, f, returnType) def registerJavaFunction(self, name, javaClassName, returnType=None): @@ -243,7 +246,8 @@ class SQLContext(object): """ warnings.warn( "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction instead.", - DeprecationWarning) + FutureWarning + ) return self.sparkSession.udf.registerJavaFunction(name, javaClassName, returnType) # TODO(andrew): delete this once we refactor things to take in SparkSession @@ -597,7 +601,8 @@ class HiveContext(SQLContext): warnings.warn( "HiveContext is deprecated in Spark 2.0.0. Please use " + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.", - DeprecationWarning) + FutureWarning + ) if jhiveContext is None: sparkContext._conf.set("spark.sql.catalogImplementation", "hive") sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate() diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index fe7d26d..e09e87c6 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -135,7 +135,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): >>> spark.catalog.dropTempView("people") """ warnings.warn( - "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning) + "Deprecated in 2.0, use createOrReplaceTempView instead.", + FutureWarning + ) self._jdf.createOrReplaceTempView(name) def createTempView(self, name): diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index c9d24dc..45dbedf 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -476,7 +476,7 @@ def toDegrees(col): .. deprecated:: 2.1.0 Use :func:`degrees` instead. """ - warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning) + warnings.warn("Deprecated in 2.1, use degrees instead.", FutureWarning) return degrees(col) @@ -486,7 +486,7 @@ def toRadians(col): .. deprecated:: 2.1.0 Use :func:`radians` instead. """ - warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning) + warnings.warn("Deprecated in 2.1, use radians instead.", FutureWarning) return radians(col) @@ -795,7 +795,7 @@ def approxCountDistinct(col, rsd=None): .. deprecated:: 2.1.0 Use :func:`approx_count_distinct` instead. """ - warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning) + warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", FutureWarning) return approx_count_distinct(col, rsd) diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 8ca4bb3..3808bc3 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -21,7 +21,7 @@ Worker that receives input from Piped RDD. import os import sys import time -from inspect import getfullargspec +from inspect import currentframe, getframeinfo, getfullargspec import importlib # 'resource' is a Unix specific module. has_resource_module = True @@ -30,6 +30,7 @@ try: except ImportError: has_resource_module = False import traceback +import warnings from pyspark.accumulators import _accumulatorRegistry from pyspark.broadcast import Broadcast, _broadcastRegistry @@ -500,7 +501,12 @@ def main(infile, outfile): except (resource.error, OSError, ValueError) as e: # not all systems support resource limits, so warn instead of failing - print("WARN: Failed to set memory limit: {0}\n".format(e), file=sys.stderr) + print(warnings.formatwarning( + "Failed to set memory limit: {0}".format(e), + ResourceWarning, + __file__, + getframeinfo(currentframe()).lineno + ), file=sys.stderr) # initialize global state taskContext = None --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org