Yikun commented on a change in pull request #33174: URL: https://github.com/apache/spark/pull/33174#discussion_r663592408
########## File path: python/run-tests.py ########## @@ -40,6 +44,111 @@ from sparktestsupport.shellutils import which, subprocess_check_output # noqa from sparktestsupport.modules import all_modules, pyspark_sql # noqa +# Make sure logging config before any possible logging print +logging.basicConfig(stream=sys.stdout, format="%(message)s") +LOGGER = logging.getLogger() + + +def _contain_unittests_class(module_name, slow=False): + """ + Check if the module with specific module has classes are derived from unittest.TestCase. + Such as: + pyspark.tests.test_appsubmit, it will return True, because there is SparkSubmitTests which is + included under the module of pyspark.tests.test_appsubmit, inherits from unittest.TestCase. + + Parameters + ---------- + module_name : str + The module name to be check + slow : bool + Return True if module contains unittests and is_slow_test is marked as True. + + Returns + ------- + True if contains unittest classes otherwise False. An ``ModuleNotFoundError`` will raise if the + module is not found. + + >>> _contain_unittests_class("pyspark.tests.test_appsubmit") + True + >>> _contain_unittests_class("pyspark.conf") + False + >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe", slow=True) + True + >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe") + False + """ + module = import_module(module_name) + for _, _class in inspect.getmembers(module, inspect.isclass): + if issubclass(_class, unittest.TestCase): + if slow and hasattr(module, 'is_slow_test'): + return True + if not slow and not hasattr(module, 'is_slow_test'): + return True + return False + + +def _discover_python_unittests(paths): + """Discover the python module which contains unittests under paths. + + Such as: + ['pyspark/tests'], it will return the set of module name under the path of pyspark/tests, like + {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...} + + Parameters + ---------- + paths : list + Paths of modules to be discovered. + + Returns + ------- + A set of complete test module name discovered under specified paths + + >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])]) Review comment: Yes, this doctest is to make sure the _discover_python_unittests work in real env. Such as if we forgot to add `pyspark.pandas.tests.test_series`, the CI would be failed due to some error like: ```Python ********************************************************************** File "./python/run-tests.py", line 116, in __main__._discover_python_unittests Failed example: sorted([x for x in _discover_python_unittests([("pyspark/pandas/tests", "slow")])]) # doctest: +NORMALIZE_WHITESPACE Expected: ['pyspark.pandas.tests.indexes.test_base', 'pyspark.pandas.tests.indexes.test_datetime', 'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby', 'pyspark.pandas.tests.test_indexing', 'pyspark.pandas.tests.test_ops_on_diff_frames', 'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 'pyspark.pandas.tests.test_series'] Got: ['pyspark.pandas.tests.indexes.test_base', 'pyspark.pandas.tests.indexes.test_datetime', 'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby', 'pyspark.pandas.tests.test_indexing', 'pyspark.pandas.tests.test_ops_on_diff_frames', 'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 'pyspark.pandas.tests.test_series', 'pyspark.pandas.tests.test_series'] ********************************************************************** ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org