Yikun commented on a change in pull request #33174: URL: https://github.com/apache/spark/pull/33174#discussion_r662283934
########## File path: python/run-tests.py ########## @@ -40,6 +42,99 @@ from sparktestsupport.shellutils import which, subprocess_check_output # noqa from sparktestsupport.modules import all_modules, pyspark_sql # noqa +# Make sure logging config before any possible logging print +logging.basicConfig(stream=sys.stdout, format="%(message)s") +LOGGER = logging.getLogger() + + +def _get_module_from_name(name): + __import__(name) + return sys.modules[name] + + +def _discover_python_unittests(paths, discover_slow=False): + """Discover the python module which contains unittests under paths. + + Such as: + ['pyspark/tests'], it will return the set of module name under the path of pyspark/tests, like + {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...} + + Parameters + ---------- + paths : list + Paths of modules to be discovered. + discover_slow : bool + If True, will only discover slow tests + If False, will discover all tests except slow tests + + Returns + ------- + A set of complete test module name discovered under specified paths + + >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])]) + ... # doctest: +NORMALIZE_WHITESPACE + ['pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', 'pyspark.tests.test_conf', + 'pyspark.tests.test_context', 'pyspark.tests.test_daemon', 'pyspark.tests.test_install_spark', + 'pyspark.tests.test_join', 'pyspark.tests.test_pin_thread', 'pyspark.tests.test_profiler', + 'pyspark.tests.test_rdd', 'pyspark.tests.test_rddbarrier', 'pyspark.tests.test_readwrite', + 'pyspark.tests.test_serializers', 'pyspark.tests.test_shuffle', + 'pyspark.tests.test_taskcontext', 'pyspark.tests.test_util', 'pyspark.tests.test_worker'] + >>> sorted([x for x in _discover_python_unittests([("pyspark/pandas/tests", "slow")])]) + ... # doctest: +NORMALIZE_WHITESPACE + ['pyspark.pandas.tests.indexes.test_base', 'pyspark.pandas.tests.indexes.test_datetime', + 'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby', + 'pyspark.pandas.tests.test_indexing', 'pyspark.pandas.tests.test_ops_on_diff_frames', + 'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 'pyspark.pandas.tests.test_series', + 'pyspark.pandas.tests.test_stats'] + """ + + def add_test_module(testcases, modules, slow): + """Append the testcases module names to modules set""" + if isinstance(testcases, Iterable): + for test_case in testcases: + add_test_module(test_case, modules, slow) + else: + name = testcases.__module__ + module = _get_module_from_name(name) + if slow and hasattr(module, 'is_slow_test'): + modules.add(name) + if not slow and not hasattr(module, 'is_slow_test'): + modules.add(name) + + if not paths: + return [] + modules = set() + pyspark_path = os.path.join(SPARK_HOME, "python") + for path in paths: + if isinstance(path, tuple) and len(path) >= 2 and path[1] == "slow": + discover_slow = True + path = path[0] + # Discover the unittest in every path + testcases = unittest.defaultTestLoader.discover( + os.path.join(pyspark_path, path), + top_level_dir=pyspark_path + ) + if not unittest.defaultTestLoader.errors: + add_test_module(testcases, modules, discover_slow) + else: + # unittest discover need all deps of PySpark + for error in unittest.defaultTestLoader.errors: + print(error) + raise Exception("Discover unittest failed, Please make sure you have install all deps " + "of PySpark.") Review comment: In order to add doctest in some time to reuse the discover module travel, it is better to use pkgutil.walk_packages, and add `_contain_unittests_class` to make sure only unittests is loaded. and when we want to add doctest in future, we could add something like `_contain_doctests_class` ```python pyspark_path = os.path.join(SPARK_HOME, "python") real_path = os.path.join(pyspark_path, path) prefix = path.replace('/', '.') # Travel the module under the real_path for importer, module_name, ispkg in pkgutil.walk_packages([real_path], prefix=prefix+'.'): if _contain_unittests_class(module_name, slow_only): modules.add(module_name) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org