New submission from Lars Gustäbel:
tarfile does not check pathnames or linknames on extraction. This can
lead to data loss or attack scenarios when members with absolute
pathnames or pathnames outside of the archive's scope overwrite or
overlay existing files or directories.
Example for a symlink attack against /etc/passwd:
foo -> /etc
foo/passwd
----------
assignee: lars.gustaebel
components: Library (Lib)
files: insecure_pathnames.diff
keywords: patch
messages: 55361
nosy: lars.gustaebel, matejcik
priority: normal
severity: normal
status: open
title: tarfile insecure pathname extraction
type: security
versions: Python 2.6
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1044>
__________________________________
Index: Doc/library/tarfile.rst
===================================================================
--- Doc/library/tarfile.rst (revision 57571)
+++ Doc/library/tarfile.rst (working copy)
@@ -168,6 +168,12 @@
:attr:`TarFile.errorlevel`\ ``== 2``.
+.. exception:: SecurityError
+
+ Is a subclass of :exc:`ExtractError` and is raised when insecure pathnames
+ are found on extraction.
+
+
.. exception:: HeaderError
Is raised by :meth:`frombuf` if the buffer it gets is invalid.
@@ -327,16 +333,22 @@
available.
-.. method:: TarFile.extractall([path[, members]])
+.. method:: TarFile.extractall([path[, members[, check_paths]]])
Extract all members from the archive to the current working directory or
- directory *path*. If optional *members* is given, it must be a subset of the
- list returned by :meth:`getmembers`. Directory information like owner,
- modification time and permissions are set after all members have been
extracted.
- This is done to work around two problems: A directory's modification time is
- reset each time a file is created in it. And, if a directory's permissions
do
- not allow writing, extracting files to it will fail.
+ directory *path*. If optional *members* is given, it must be an iterator of
+ :class:`TarInfo` objects (e.g. a subset of the list returned by
+ :meth:`getmembers`). If *check_paths* is :const:`True` (default), insecure
+ pathnames that are absolute or point to a destination outside of the
+ archive's scope are rejected. Depending on :attr:`TarFile.errorlevel` a
+ :exc:`SecurityError` is raised.
+ Directory information like owner, modification time and permissions are set
+ after all members have been extracted. This is done to work around two
+ problems: A directory's modification time is reset each time a file is
+ created in it. And, if a directory's permissions do not allow writing,
+ extracting files to it will fail.
+
.. versionadded:: 2.5
@@ -349,9 +361,8 @@
.. note::
- Because the :meth:`extract` method allows random access to a tar archive
there
- are some issues you must take care of yourself. See the description for
- :meth:`extractall` above.
+ The :meth:`extract` method does not take care of several extraction
issues.
+ In most cases you should consider using the :meth:`extractall` method.
.. method:: TarFile.extractfile(member)
Index: Lib/tarfile.py
===================================================================
--- Lib/tarfile.py (revision 57571)
+++ Lib/tarfile.py (working copy)
@@ -340,6 +340,9 @@
class ExtractError(TarError):
"""General exception for extract errors."""
pass
+class SecurityError(ExtractError):
+ """Exception for insecure pathnames."""
+ pass
class ReadError(TarError):
"""Exception for unreadble tar archives."""
pass
@@ -2006,12 +2009,13 @@
self.members.append(tarinfo)
- def extractall(self, path=".", members=None):
+ def extractall(self, path=".", members=None, check_paths=True):
"""Extract all members from the archive to the current working
directory and set owner, modification time and permissions on
directories afterwards. `path' specifies a different directory
to extract to. `members' is optional and must be a subset of the
- list returned by getmembers().
+ list returned by getmembers(). If `check_paths' is True insecure
+ pathnames are not extracted.
"""
directories = []
@@ -2019,6 +2023,20 @@
members = self
for tarinfo in members:
+ if check_paths:
+ try:
+ self._check_path(tarinfo.name)
+ if tarinfo.islnk():
+ self._check_path(tarinfo.linkname)
+ if tarinfo.issym():
+ self._check_path(os.path.join(tarinfo.name,
tarinfo.linkname))
+ except SecurityError, e:
+ if self.errorlevel > 1:
+ raise
+ else:
+ self._dbg(1, "tarfile: %s" % e)
+ continue
+
if tarinfo.isdir():
# Extract directory with a safe mode, so that
# all files below can be extracted as well.
@@ -2329,6 +2347,15 @@
#--------------------------------------------------------------------------
# Little helper methods:
+ def _check_path(self, path):
+ """Raise an SecurityError if `path' is an insecure pathname.
+ """
+ path = normpath(path)
+ if path.startswith("/"):
+ raise SecurityError("found insecure absolute path %r" % path)
+ if path.startswith("../"):
+ raise SecurityError("found insecure relative path %r" % path)
+
def _getmember(self, name, tarinfo=None):
"""Find an archive member by name from bottom to top.
If tarinfo is given, it is used as the starting point.
Index: Lib/test/test_tarfile.py
===================================================================
--- Lib/test/test_tarfile.py (revision 57571)
+++ Lib/test/test_tarfile.py (working copy)
@@ -1029,6 +1029,57 @@
tarinfo.tobuf(tarfile.PAX_FORMAT)
+class SecurityTest(unittest.TestCase):
+
+ class MockTarFile(tarfile.TarFile):
+ def _extract_member(self, tarinfo, path):
+ pass
+
+ def setUp(self):
+ fobj = StringIO.StringIO()
+ self.tar = tarfile.open(fileobj=fobj, mode="w")
+ self.tar.addfile(tarfile.TarInfo("/foo"))
+ self.tar.addfile(tarfile.TarInfo("../../../foo"))
+ self.tar.addfile(tarfile.TarInfo("bar/../../foo"))
+
+ names = ["bar1", "bar2", "bar3", "baz1", "baz2", "baz3"]
+ for tp in (tarfile.LNKTYPE, tarfile.SYMTYPE):
+ for target in ("/foo", "../../foo", "../foo"):
+ t = tarfile.TarInfo(names.pop(0))
+ t.type = tp
+ t.linkname = target
+ self.tar.addfile(t)
+ self.tar.close()
+
+ fobj.seek(0)
+ self.tar = self.MockTarFile(fileobj=fobj)
+ self.tar.errorlevel = 2
+
+ def tearDown(self):
+ self.tar.close()
+
+ def _extract(self, name):
+ tarinfo = self.tar.getmember(name)
+ self.tar.extractall(members=[tarinfo])
+
+ def test_absolute_pathnames(self):
+ self.assertRaises(tarfile.ExtractError, self._extract, "/foo")
+
+ def test_directory_traversal(self):
+ self.assertRaises(tarfile.ExtractError, self._extract, "../../../foo")
+ self.assertRaises(tarfile.ExtractError, self._extract, "bar/../../foo")
+
+ def test_absolute_link(self):
+ self.assertRaises(tarfile.ExtractError, self._extract, "bar1")
+ self.assertRaises(tarfile.ExtractError, self._extract, "bar2")
+ self.assertRaises(tarfile.ExtractError, self._extract, "bar3")
+
+ def test_absolute_symlink(self):
+ self.assertRaises(tarfile.ExtractError, self._extract, "baz1")
+ self.assertRaises(tarfile.ExtractError, self._extract, "baz2")
+ self._extract("baz3")
+
+
class GzipMiscReadTest(MiscReadTest):
tarname = gzipname
mode = "r:gz"
@@ -1079,6 +1130,7 @@
PaxUnicodeTest,
AppendTest,
LimitsTest,
+ SecurityTest,
]
if hasattr(os, "link"):
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com