https://github.com/python/cpython/commit/474e419792484d1c16e7d9c99b7bf144136b9307
commit: 474e419792484d1c16e7d9c99b7bf144136b9307
branch: main
author: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్  రెడ్డి) 
<[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-01-08T10:32:07Z
summary:

gh-41872: Fix quick extraction of module docstrings from a file in pydoc 
(GH-127520)

It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.

Co-authored-by: Éric <[email protected]>
Co-authored-by: Serhiy Storchaka <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
M Lib/pydoc.py
M Lib/test/test_pydoc/test_pydoc.py

diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index c863794ea14ef9..9e84292aaf825f 100644
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -53,6 +53,7 @@ class or function within a module or module in a package.  If 
the
 #     the current directory is changed with os.chdir(), an incorrect
 #     path will be displayed.
 
+import ast
 import __future__
 import builtins
 import importlib._bootstrap
@@ -384,21 +385,29 @@ def ispackage(path):
     return False
 
 def source_synopsis(file):
-    line = file.readline()
-    while line[:1] == '#' or not line.strip():
-        line = file.readline()
-        if not line: break
-    line = line.strip()
-    if line[:4] == 'r"""': line = line[1:]
-    if line[:3] == '"""':
-        line = line[3:]
-        if line[-1:] == '\\': line = line[:-1]
-        while not line.strip():
-            line = file.readline()
-            if not line: break
-        result = line.split('"""')[0].strip()
-    else: result = None
-    return result
+    """Return the one-line summary of a file object, if present"""
+
+    string = ''
+    try:
+        tokens = tokenize.generate_tokens(file.readline)
+        for tok_type, tok_string, _, _, _ in tokens:
+            if tok_type == tokenize.STRING:
+                string += tok_string
+            elif tok_type == tokenize.NEWLINE:
+                with warnings.catch_warnings():
+                    # Ignore the "invalid escape sequence" warning.
+                    warnings.simplefilter("ignore", SyntaxWarning)
+                    docstring = ast.literal_eval(string)
+                if not isinstance(docstring, str):
+                    return None
+                return docstring.strip().split('\n')[0].strip()
+            elif tok_type == tokenize.OP and tok_string in ('(', ')'):
+                string += tok_string
+            elif tok_type not in (tokenize.COMMENT, tokenize.NL, 
tokenize.ENCODING):
+                return None
+    except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
+        return None
+    return None
 
 def synopsis(filename, cache={}):
     """Get the one-line summary out of a module file."""
diff --git a/Lib/test/test_pydoc/test_pydoc.py 
b/Lib/test/test_pydoc/test_pydoc.py
index c798b11f5aa56e..cec18aa9440c9e 100644
--- a/Lib/test/test_pydoc/test_pydoc.py
+++ b/Lib/test/test_pydoc/test_pydoc.py
@@ -4,6 +4,7 @@
 import contextlib
 import importlib.util
 import inspect
+import io
 import pydoc
 import py_compile
 import keyword
@@ -899,6 +900,82 @@ def test_synopsis(self):
             synopsis = pydoc.synopsis(TESTFN, {})
             self.assertEqual(synopsis, 'line 1: h\xe9')
 
+    def test_source_synopsis(self):
+        def check(source, expected, encoding=None):
+            if isinstance(source, str):
+                source_file = StringIO(source)
+            else:
+                source_file = io.TextIOWrapper(io.BytesIO(source), 
encoding=encoding)
+            with source_file:
+                result = pydoc.source_synopsis(source_file)
+                self.assertEqual(result, expected)
+
+        check('"""Single line docstring."""',
+              'Single line docstring.')
+        check('"""First line of docstring.\nSecond line.\nThird line."""',
+              'First line of docstring.')
+        check('"""First line of docstring.\\nSecond line.\\nThird line."""',
+              'First line of docstring.')
+        check('"""  Whitespace around docstring.  """',
+              'Whitespace around docstring.')
+        check('import sys\n"""No docstring"""',
+              None)
+        check('  \n"""Docstring after empty line."""',
+              'Docstring after empty line.')
+        check('# Comment\n"""Docstring after comment."""',
+              'Docstring after comment.')
+        check('  # Indented comment\n"""Docstring after comment."""',
+              'Docstring after comment.')
+        check('""""""', # Empty docstring
+              '')
+        check('', # Empty file
+              None)
+        check('"""Embedded\0null byte"""',
+              None)
+        check('"""Embedded null byte"""\0',
+              None)
+        check('"""Café and résumé."""',
+              'Café and résumé.')
+        check("'''Triple single quotes'''",
+              'Triple single quotes')
+        check('"Single double quotes"',
+              'Single double quotes')
+        check("'Single single quotes'",
+              'Single single quotes')
+        check('"""split\\\nline"""',
+              'splitline')
+        check('"""Unrecognized escape \\sequence"""',
+              'Unrecognized escape \\sequence')
+        check('"""Invalid escape seq\\uence"""',
+              None)
+        check('r"""Raw \\stri\\ng"""',
+              'Raw \\stri\\ng')
+        check('b"""Bytes literal"""',
+              None)
+        check('f"""f-string"""',
+              None)
+        check('"""Concatenated""" \\\n"string" \'literals\'',
+              'Concatenatedstringliterals')
+        check('"""String""" + """expression"""',
+              None)
+        check('("""In parentheses""")',
+              'In parentheses')
+        check('("""Multiple lines """\n"""in parentheses""")',
+              'Multiple lines in parentheses')
+        check('()', # tuple
+              None)
+        check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
+              '€uro sign', encoding='iso-8859-15')
+        check(b'"""\xa4"""', # Decoding error
+              None, encoding='utf-8')
+
+        with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as 
temp_file:
+            temp_file.write('"""Real file test."""\n')
+            temp_file.flush()
+            temp_file.seek(0)
+            result = pydoc.source_synopsis(temp_file)
+            self.assertEqual(result, "Real file test.")
+
     @requires_docstrings
     def test_synopsis_sourceless(self):
         os = import_helper.import_fresh_module('os')
diff --git 
a/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst 
b/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
new file mode 100644
index 00000000000000..b807dcb284c248
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
@@ -0,0 +1,3 @@
+Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
+It now supports docstrings with single quotes, escape sequences,
+raw string literals, and other Python syntax.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to