https://github.com/python/cpython/commit/5bb059fe606983814a445e4dcf9e96fd7cb4951a
commit: 5bb059fe606983814a445e4dcf9e96fd7cb4951a
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-25T19:59:20Z
summary:

GH-127236: `pathname2url()`: generate RFC 1738 URL for absolute POSIX path 
(#127194)

When handed an absolute Windows path such as `C:\foo` or `//server/share`,
the `urllib.request.pathname2url()` function returns a URL with an
authority section, such as `///C:/foo` or `//server/share` (or before
GH-126205, `////server/share`). Only the `file:` prefix is omitted.

But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows
path of the same form (rooted but lacking a drive), the function returns a
URL without an authority section, such as `/etc/hosts`.

This patch corrects the discrepancy by adding a `//` prefix before
drive-less, rooted paths when generating URLs.

files:
A Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst
M Doc/library/urllib.request.rst
M Lib/nturl2path.py
M Lib/test/test_urllib.py
M Lib/urllib/request.py

diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 9055556a3703bb..3c07dc4adf434a 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following 
functions:
       'file:///C:/Program%20Files'
 
    .. versionchanged:: 3.14
-      Windows drive letters are no longer converted to uppercase.
+      Paths beginning with a slash are converted to URLs with authority
+      sections. For example, the path ``/etc/hosts`` is converted to
+      the URL ``///etc/hosts``.
 
    .. versionchanged:: 3.14
-      On Windows, ``:`` characters not following a drive letter are quoted. In
-      previous versions, :exc:`OSError` was raised if a colon character was
-      found in any position other than the second character.
+      Windows drive letters are no longer converted to uppercase, and ``:``
+      characters not following a drive letter no longer cause an
+      :exc:`OSError` exception to be raised on Windows.
 
 
 .. function:: url2pathname(url)
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 01135d1b7683b2..7e13ae3128333d 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -55,13 +55,17 @@ def pathname2url(p):
         p = p[4:]
         if p[:4].upper() == 'UNC/':
             p = '//' + p[4:]
-    drive, tail = ntpath.splitdrive(p)
-    if drive[1:] == ':':
-        # DOS drive specified. Add three slashes to the start, producing
-        # an authority section with a zero-length authority, and a path
-        # section starting with a single slash.
-        drive = f'///{drive}'
+    drive, root, tail = ntpath.splitroot(p)
+    if drive:
+        if drive[1:] == ':':
+            # DOS drive specified. Add three slashes to the start, producing
+            # an authority section with a zero-length authority, and a path
+            # section starting with a single slash.
+            drive = f'///{drive}'
+        drive = urllib.parse.quote(drive, safe='/:')
+    elif root:
+        # Add explicitly empty authority to path beginning with one slash.
+        root = f'//{root}'
 
-    drive = urllib.parse.quote(drive, safe='/:')
     tail = urllib.parse.quote(tail)
-    return drive + tail
+    return drive + root + tail
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index fe16badc5bc77d..00e46990c406ac 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1434,7 +1434,7 @@ def test_pathname2url_win(self):
         self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
         self.assertEqual(fn('foo:bar'), 'foo%3Abar')
         # No drive letter
-        self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
+        self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
         self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
         self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
         self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
@@ -1447,7 +1447,7 @@ def test_pathname2url_win(self):
         self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir')
         # Round-tripping
         urls = ['///C:',
-                '/folder/test/',
+                '///folder/test/',
                 '///C:/foo/bar/spam.foo']
         for url in urls:
             self.assertEqual(fn(urllib.request.url2pathname(url)), url)
@@ -1456,12 +1456,12 @@ def test_pathname2url_win(self):
                      'test specific to POSIX pathnames')
     def test_pathname2url_posix(self):
         fn = urllib.request.pathname2url
-        self.assertEqual(fn('/'), '/')
-        self.assertEqual(fn('/a/b.c'), '/a/b.c')
+        self.assertEqual(fn('/'), '///')
+        self.assertEqual(fn('/a/b.c'), '///a/b.c')
         self.assertEqual(fn('//a/b.c'), '////a/b.c')
         self.assertEqual(fn('///a/b.c'), '/////a/b.c')
         self.assertEqual(fn('////a/b.c'), '//////a/b.c')
-        self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
+        self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
 
     @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
     def test_pathname2url_nonascii(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 9e555432688a5b..1fcaa89188188d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1667,9 +1667,11 @@ def url2pathname(pathname):
     def pathname2url(pathname):
         """OS-specific conversion from a file system path to a relative URL
         of the 'file' scheme; not recommended for general use."""
-        if pathname[:2] == '//':
-            # Add explicitly empty authority to avoid interpreting the path
-            # as authority.
+        if pathname[:1] == '/':
+            # Add explicitly empty authority to absolute path. If the path
+            # starts with exactly one slash then this change is mostly
+            # cosmetic, but if it begins with two or more slashes then this
+            # avoids interpreting the path as a URL authority.
             pathname = '//' + pathname
         encoding = sys.getfilesystemencoding()
         errors = sys.getfilesystemencodeerrors()
diff --git 
a/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst 
b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst
new file mode 100644
index 00000000000000..0b8ffdb3901db3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst
@@ -0,0 +1,5 @@
+:func:`urllib.request.pathname2url` now adds an empty authority when
+generating a URL for a path that begins with exactly one slash. For example,
+the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``.
+As a result of this change, URLs without authorities are only generated for
+relative paths.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to