New submission from Lars Gustäbel:

tarfile does not check pathnames or linknames on extraction. This can
lead to data loss or attack scenarios when members with absolute
pathnames or pathnames outside of the archive's scope overwrite or
overlay existing files or directories.

Example for a symlink attack against /etc/passwd:

foo -> /etc
foo/passwd

----------
assignee: lars.gustaebel
components: Library (Lib)
files: insecure_pathnames.diff
keywords: patch
messages: 55361
nosy: lars.gustaebel, matejcik
priority: normal
severity: normal
status: open
title: tarfile insecure pathname extraction
type: security
versions: Python 2.6

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1044>
__________________________________
Index: Doc/library/tarfile.rst
===================================================================
--- Doc/library/tarfile.rst     (revision 57571)
+++ Doc/library/tarfile.rst     (working copy)
@@ -168,6 +168,12 @@
    :attr:`TarFile.errorlevel`\ ``== 2``.
 
 
+.. exception:: SecurityError
+
+   Is a subclass of :exc:`ExtractError` and is raised when insecure pathnames
+   are found on extraction.
+
+
 .. exception:: HeaderError
 
    Is raised by :meth:`frombuf` if the buffer it gets is invalid.
@@ -327,16 +333,22 @@
    available.
 
 
-.. method:: TarFile.extractall([path[, members]])
+.. method:: TarFile.extractall([path[, members[, check_paths]]])
 
    Extract all members from the archive to the current working directory or
-   directory *path*. If optional *members* is given, it must be a subset of the
-   list returned by :meth:`getmembers`. Directory information like owner,
-   modification time and permissions are set after all members have been 
extracted.
-   This is done to work around two problems: A directory's modification time is
-   reset each time a file is created in it. And, if a directory's permissions 
do
-   not allow writing, extracting files to it will fail.
+   directory *path*. If optional *members* is given, it must be an iterator of
+   :class:`TarInfo` objects (e.g. a subset of the list returned by
+   :meth:`getmembers`). If *check_paths* is :const:`True` (default), insecure
+   pathnames that are absolute or point to a destination outside of the
+   archive's scope are rejected. Depending on :attr:`TarFile.errorlevel` a
+   :exc:`SecurityError` is raised.
 
+   Directory information like owner, modification time and permissions are set
+   after all members have been extracted. This is done to work around two
+   problems: A directory's modification time is reset each time a file is
+   created in it. And, if a directory's permissions do not allow writing,
+   extracting files to it will fail.
+
    .. versionadded:: 2.5
 
 
@@ -349,9 +361,8 @@
 
    .. note::
 
-      Because the :meth:`extract` method allows random access to a tar archive 
there
-      are some issues you must take care of yourself. See the description for
-      :meth:`extractall` above.
+      The :meth:`extract` method does not take care of several extraction 
issues.
+      In most cases you should consider using the :meth:`extractall` method.
 
 
 .. method:: TarFile.extractfile(member)
Index: Lib/tarfile.py
===================================================================
--- Lib/tarfile.py      (revision 57571)
+++ Lib/tarfile.py      (working copy)
@@ -340,6 +340,9 @@
 class ExtractError(TarError):
     """General exception for extract errors."""
     pass
+class SecurityError(ExtractError):
+    """Exception for insecure pathnames."""
+    pass
 class ReadError(TarError):
     """Exception for unreadble tar archives."""
     pass
@@ -2006,12 +2009,13 @@
 
         self.members.append(tarinfo)
 
-    def extractall(self, path=".", members=None):
+    def extractall(self, path=".", members=None, check_paths=True):
         """Extract all members from the archive to the current working
            directory and set owner, modification time and permissions on
            directories afterwards. `path' specifies a different directory
            to extract to. `members' is optional and must be a subset of the
-           list returned by getmembers().
+           list returned by getmembers(). If `check_paths' is True insecure
+           pathnames are not extracted.
         """
         directories = []
 
@@ -2019,6 +2023,20 @@
             members = self
 
         for tarinfo in members:
+            if check_paths:
+                try:
+                    self._check_path(tarinfo.name)
+                    if tarinfo.islnk():
+                        self._check_path(tarinfo.linkname)
+                    if tarinfo.issym():
+                        self._check_path(os.path.join(tarinfo.name, 
tarinfo.linkname))
+                except SecurityError, e:
+                    if self.errorlevel > 1:
+                        raise
+                    else:
+                        self._dbg(1, "tarfile: %s" % e)
+                        continue
+
             if tarinfo.isdir():
                 # Extract directory with a safe mode, so that
                 # all files below can be extracted as well.
@@ -2329,6 +2347,15 @@
     #--------------------------------------------------------------------------
     # Little helper methods:
 
+    def _check_path(self, path):
+        """Raise an SecurityError if `path' is an insecure pathname.
+        """
+        path = normpath(path)
+        if path.startswith("/"):
+            raise SecurityError("found insecure absolute path %r" % path)
+        if path.startswith("../"):
+            raise SecurityError("found insecure relative path %r" % path)
+
     def _getmember(self, name, tarinfo=None):
         """Find an archive member by name from bottom to top.
            If tarinfo is given, it is used as the starting point.
Index: Lib/test/test_tarfile.py
===================================================================
--- Lib/test/test_tarfile.py    (revision 57571)
+++ Lib/test/test_tarfile.py    (working copy)
@@ -1029,6 +1029,57 @@
         tarinfo.tobuf(tarfile.PAX_FORMAT)
 
 
+class SecurityTest(unittest.TestCase):
+
+    class MockTarFile(tarfile.TarFile):
+        def _extract_member(self, tarinfo, path):
+            pass
+
+    def setUp(self):
+        fobj = StringIO.StringIO()
+        self.tar = tarfile.open(fileobj=fobj, mode="w")
+        self.tar.addfile(tarfile.TarInfo("/foo"))
+        self.tar.addfile(tarfile.TarInfo("../../../foo"))
+        self.tar.addfile(tarfile.TarInfo("bar/../../foo"))
+
+        names = ["bar1", "bar2", "bar3", "baz1", "baz2", "baz3"]
+        for tp in (tarfile.LNKTYPE, tarfile.SYMTYPE):
+            for target in ("/foo", "../../foo", "../foo"):
+                t = tarfile.TarInfo(names.pop(0))
+                t.type = tp
+                t.linkname = target
+                self.tar.addfile(t)
+        self.tar.close()
+
+        fobj.seek(0)
+        self.tar = self.MockTarFile(fileobj=fobj)
+        self.tar.errorlevel = 2
+
+    def tearDown(self):
+        self.tar.close()
+
+    def _extract(self, name):
+        tarinfo = self.tar.getmember(name)
+        self.tar.extractall(members=[tarinfo])
+
+    def test_absolute_pathnames(self):
+        self.assertRaises(tarfile.ExtractError, self._extract, "/foo")
+
+    def test_directory_traversal(self):
+        self.assertRaises(tarfile.ExtractError, self._extract, "../../../foo")
+        self.assertRaises(tarfile.ExtractError, self._extract, "bar/../../foo")
+
+    def test_absolute_link(self):
+        self.assertRaises(tarfile.ExtractError, self._extract, "bar1")
+        self.assertRaises(tarfile.ExtractError, self._extract, "bar2")
+        self.assertRaises(tarfile.ExtractError, self._extract, "bar3")
+
+    def test_absolute_symlink(self):
+        self.assertRaises(tarfile.ExtractError, self._extract, "baz1")
+        self.assertRaises(tarfile.ExtractError, self._extract, "baz2")
+        self._extract("baz3")
+
+
 class GzipMiscReadTest(MiscReadTest):
     tarname = gzipname
     mode = "r:gz"
@@ -1079,6 +1130,7 @@
         PaxUnicodeTest,
         AppendTest,
         LimitsTest,
+        SecurityTest,
     ]
 
     if hasattr(os, "link"):
_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to