Yikun commented on a change in pull request #33174:
URL: https://github.com/apache/spark/pull/33174#discussion_r663592408



##########
File path: python/run-tests.py
##########
@@ -40,6 +44,111 @@
 from sparktestsupport.shellutils import which, subprocess_check_output  # noqa
 from sparktestsupport.modules import all_modules, pyspark_sql  # noqa
 
+# Make sure logging config before any possible logging print
+logging.basicConfig(stream=sys.stdout, format="%(message)s")
+LOGGER = logging.getLogger()
+
+
+def _contain_unittests_class(module_name, slow=False):
+    """
+    Check if the module with specific module has classes are derived from 
unittest.TestCase.
+    Such as:
+    pyspark.tests.test_appsubmit, it will return True, because there is 
SparkSubmitTests which is
+    included under the module of pyspark.tests.test_appsubmit, inherits from 
unittest.TestCase.
+
+    Parameters
+    ----------
+    module_name : str
+        The module name to be check
+    slow : bool
+        Return True if module contains unittests and is_slow_test is marked as 
True.
+
+    Returns
+    -------
+    True if contains unittest classes otherwise False. An 
``ModuleNotFoundError`` will raise if the
+    module is not found.
+
+    >>> _contain_unittests_class("pyspark.tests.test_appsubmit")
+    True
+    >>> _contain_unittests_class("pyspark.conf")
+    False
+    >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe", 
slow=True)
+    True
+    >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe")
+    False
+    """
+    module = import_module(module_name)
+    for _, _class in inspect.getmembers(module, inspect.isclass):
+        if issubclass(_class, unittest.TestCase):
+            if slow and hasattr(module, 'is_slow_test'):
+                return True
+            if not slow and not hasattr(module, 'is_slow_test'):
+                return True
+    return False
+
+
+def _discover_python_unittests(paths):
+    """Discover the python module which contains unittests under paths.
+
+    Such as:
+    ['pyspark/tests'], it will return the set of module name under the path of 
pyspark/tests, like
+    {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
+
+    Parameters
+    ----------
+    paths : list
+        Paths of modules to be discovered.
+
+    Returns
+    -------
+    A set of complete test module name discovered under specified paths
+
+    >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])])

Review comment:
       Yes, this doctest is to make sure the _discover_python_unittests work in 
real env.
   
   Such as if we forgot to add `pyspark.pandas.tests.test_series`, the CI would 
be failed due to some error like:
   
   ```Python
   **********************************************************************
   File "./python/run-tests.py", line 116, in 
__main__._discover_python_unittests
   Failed example:
       sorted([x for x in _discover_python_unittests([("pyspark/pandas/tests", 
"slow")])])
       # doctest: +NORMALIZE_WHITESPACE
   Expected:
       ['pyspark.pandas.tests.indexes.test_base', 
'pyspark.pandas.tests.indexes.test_datetime',
       'pyspark.pandas.tests.test_dataframe', 
'pyspark.pandas.tests.test_groupby',
       'pyspark.pandas.tests.test_indexing', 
'pyspark.pandas.tests.test_ops_on_diff_frames',
       'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 
'pyspark.pandas.tests.test_series']
   Got:
       ['pyspark.pandas.tests.indexes.test_base', 
'pyspark.pandas.tests.indexes.test_datetime',
   'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby',
   'pyspark.pandas.tests.test_indexing', 
'pyspark.pandas.tests.test_ops_on_diff_frames',
   'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 
'pyspark.pandas.tests.test_series', 'pyspark.pandas.tests.test_series']
   **********************************************************************
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to