Skip to content

Commit 10a13be

Browse files
miss-islingtonencukougpshead
authored
[3.11] gh-149486: tarfile.data_filter: validate written link target (GH-149487) (#150904)
* gh-149486: tarfile.data_filter: validate written link target (GH-149487) The data filter rewrote linknames with normpath() but ran the containment check against the un-normalised value, and computed a symlink's directory before stripping trailing slashes. Both let a crafted archive create links pointing outside the destination. Also reject link members that resolve to the destination directory itself, which could otherwise replace it with a symlink and redirect all subsequent members. (Patch by Greg; Petr's just reviewing & merging.) (cherry picked from commit 5784119) Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Gregory P. Smith <greg@krypto.org> * Move dotdot_resolves_early setting to setUpClass --------- Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent dae4b1a commit 10a13be

3 files changed

Lines changed: 133 additions & 39 deletions

File tree

Lib/tarfile.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -817,16 +817,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
817817
if member.islnk() or member.issym():
818818
if os.path.isabs(member.linkname):
819819
raise AbsoluteLinkError(member)
820+
# A link member that resolves to the destination directory itself
821+
# would replace it with a (sym)link, redirecting the destination
822+
# for all subsequent members.
823+
if target_path == dest_path:
824+
raise OutsideDestinationError(member, target_path)
820825
normalized = os.path.normpath(member.linkname)
821826
if normalized != member.linkname:
822827
new_attrs['linkname'] = normalized
823828
if member.issym():
824-
target_path = os.path.join(dest_path,
825-
os.path.dirname(name),
826-
member.linkname)
829+
# The symlink is created at `name` with trailing separators
830+
# stripped, so its target is relative to the directory
831+
# containing that path.
832+
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
833+
target_path = os.path.join(dest_path, link_dir, normalized)
827834
else:
828-
target_path = os.path.join(dest_path,
829-
member.linkname)
835+
target_path = os.path.join(dest_path, normalized)
830836
target_path = os.path.realpath(target_path,
831837
strict=os.path.ALLOW_MISSING)
832838
if os.path.commonpath([target_path, dest_path]) != dest_path:

Lib/test/test_tarfile.py

Lines changed: 117 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3448,6 +3448,39 @@ class TestExtractionFilters(unittest.TestCase):
34483448
# The destination for the extraction, within `outerdir`
34493449
destdir = outerdir / 'dest'
34503450

3451+
@classmethod
3452+
def setUpClass(cls):
3453+
# Posix and Windows have different pathname resolution:
3454+
# either symlink or a '..' component resolve first.
3455+
# Let's see which we are on.
3456+
if os_helper.can_symlink():
3457+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3458+
os.mkdir(testpath)
3459+
3460+
# testpath/current links to `.` which is all of:
3461+
# - `testpath`
3462+
# - `testpath/current`
3463+
# - `testpath/current/current`
3464+
# - etc.
3465+
os.symlink('.', os.path.join(testpath, 'current'))
3466+
3467+
# we'll test where `testpath/current/../file` ends up
3468+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3469+
pass
3470+
3471+
if os.path.exists(os.path.join(testpath, 'file')):
3472+
# Windows collapses 'current\..' to '.' first, leaving
3473+
# 'testpath\file'
3474+
cls.dotdot_resolves_early = True
3475+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3476+
# Posix resolves 'current' to '.' first, leaving
3477+
# 'testpath/../file'
3478+
cls.dotdot_resolves_early = False
3479+
else:
3480+
raise AssertionError('Could not determine link resolution')
3481+
else:
3482+
cls.dotdot_resolves_early = False
3483+
34513484
@contextmanager
34523485
def check_context(self, tar, filter, *, check_flag=True):
34533486
"""Extracts `tar` to `self.destdir` and allows checking the result
@@ -3619,10 +3652,19 @@ def test_parent_symlink(self):
36193652
+ "which is outside the destination")
36203653

36213654
with self.check_context(arc.open(), 'data'):
3622-
self.expect_exception(
3623-
tarfile.LinkOutsideDestinationError,
3624-
"""'parent' would link to ['"].*outerdir['"], """
3625-
+ "which is outside the destination")
3655+
if self.dotdot_resolves_early:
3656+
# 'current/../..' normalises to '..', which is rejected.
3657+
self.expect_exception(
3658+
tarfile.LinkOutsideDestinationError,
3659+
"""'parent' would link to ['"].*outerdir['"], """
3660+
+ "which is outside the destination")
3661+
else:
3662+
# 'current/..' normalises to '.'; the rewritten link is
3663+
# created and 'parent/evil' lands harmlessly inside the
3664+
# destination.
3665+
self.expect_file('current', symlink_to='.')
3666+
self.expect_file('parent', symlink_to='.')
3667+
self.expect_file('evil')
36263668

36273669
else:
36283670
# No symlink support. The symlinks are ignored.
@@ -3712,35 +3754,6 @@ def test_parent_symlink2(self):
37123754
# Test interplaying symlinks
37133755
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
37143756

3715-
# Posix and Windows have different pathname resolution:
3716-
# either symlink or a '..' component resolve first.
3717-
# Let's see which we are on.
3718-
if os_helper.can_symlink():
3719-
testpath = os.path.join(TEMPDIR, 'resolution_test')
3720-
os.mkdir(testpath)
3721-
3722-
# testpath/current links to `.` which is all of:
3723-
# - `testpath`
3724-
# - `testpath/current`
3725-
# - `testpath/current/current`
3726-
# - etc.
3727-
os.symlink('.', os.path.join(testpath, 'current'))
3728-
3729-
# we'll test where `testpath/current/../file` ends up
3730-
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3731-
pass
3732-
3733-
if os.path.exists(os.path.join(testpath, 'file')):
3734-
# Windows collapses 'current\..' to '.' first, leaving
3735-
# 'testpath\file'
3736-
dotdot_resolves_early = True
3737-
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3738-
# Posix resolves 'current' to '.' first, leaving
3739-
# 'testpath/../file'
3740-
dotdot_resolves_early = False
3741-
else:
3742-
raise AssertionError('Could not determine link resolution')
3743-
37443757
with ArchiveMaker() as arc:
37453758

37463759
# `current` links to `.` which is both the destination directory
@@ -3776,7 +3789,7 @@ def test_parent_symlink2(self):
37763789

37773790
with self.check_context(arc.open(), 'data'):
37783791
if os_helper.can_symlink():
3779-
if dotdot_resolves_early:
3792+
if self.dotdot_resolves_early:
37803793
# Fail when extracting a file outside destination
37813794
self.expect_exception(
37823795
tarfile.OutsideDestinationError,
@@ -3896,6 +3909,76 @@ def test_sly_relative2(self):
38963909
+ """['"].*moo['"], which is outside the """
38973910
+ "destination")
38983911

3912+
@symlink_test
3913+
@os_helper.skip_unless_symlink
3914+
def test_normpath_realpath_mismatch(self):
3915+
# The link-target check must validate the value that will actually
3916+
# be written to disk (the normalised linkname), not the original.
3917+
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
3918+
# of 'a/../../...' stays inside the destination while normpath()
3919+
# collapses 'a/..' lexically and escapes.
3920+
depth = len(self.destdir.parts) + 5
3921+
deep = '/'.join(f'p{i}' for i in range(depth))
3922+
sneaky = 'a/' + '../' * depth + 'flag'
3923+
for kind in 'symlink_to', 'hardlink_to':
3924+
with self.subTest(kind):
3925+
with ArchiveMaker() as arc:
3926+
arc.add('a', symlink_to=deep)
3927+
arc.add('escape', **{kind: sneaky})
3928+
with self.check_context(arc.open(), 'data'):
3929+
self.expect_exception(
3930+
tarfile.LinkOutsideDestinationError)
3931+
3932+
@symlink_test
3933+
@os_helper.skip_unless_symlink
3934+
def test_symlink_trailing_slash(self):
3935+
# A trailing slash on a symlink member's name must not cause the
3936+
# link target to be resolved relative to the wrong directory.
3937+
with ArchiveMaker() as arc:
3938+
t = tarfile.TarInfo('x/')
3939+
t.type = tarfile.SYMTYPE
3940+
t.linkname = '..'
3941+
arc.tar_w.addfile(t)
3942+
arc.add('x/escaped', content='hi')
3943+
3944+
with self.check_context(arc.open(), 'data'):
3945+
self.expect_exception(tarfile.LinkOutsideDestinationError)
3946+
3947+
@symlink_test
3948+
@os_helper.skip_unless_symlink
3949+
def test_link_at_destination(self):
3950+
# A link member whose name resolves to the destination directory
3951+
# itself must be rejected: otherwise the destination is replaced
3952+
# by a symlink and later members can be redirected through it.
3953+
for name in '', '.', './':
3954+
with ArchiveMaker() as arc:
3955+
t = tarfile.TarInfo(name)
3956+
t.type = tarfile.SYMTYPE
3957+
t.linkname = '.'
3958+
arc.tar_w.addfile(t)
3959+
3960+
with self.check_context(arc.open(), 'data'):
3961+
self.expect_exception(tarfile.OutsideDestinationError)
3962+
3963+
@symlink_test
3964+
@os_helper.skip_unless_symlink
3965+
def test_empty_name_symlink_chain(self):
3966+
# Regression test for a chain of empty-named symlinks that
3967+
# incrementally redirects the destination outwards.
3968+
with ArchiveMaker() as arc:
3969+
for name, target in [('', ''), ('a/', '..'),
3970+
('', 'dummy'), ('', 'a'),
3971+
('b/', '..'),
3972+
('', 'dummy'), ('', 'a/b')]:
3973+
t = tarfile.TarInfo(name)
3974+
t.type = tarfile.SYMTYPE
3975+
t.linkname = target
3976+
arc.tar_w.addfile(t)
3977+
arc.add('escaped', content='hi')
3978+
3979+
with self.check_context(arc.open(), 'data'):
3980+
self.expect_exception(tarfile.FilterError)
3981+
38993982
@symlink_test
39003983
def test_deep_symlink(self):
39013984
# Test that symlinks and hardlinks inside a directory
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:func:`tarfile.data_filter` now validates link targets using the same
2+
normalised value that is written to disk, strips trailing separators from
3+
the member name when resolving a symlink's directory, and rejects link
4+
members that would replace the destination directory itself. This closes
5+
several path-traversal bypasses of the ``data`` extraction filter.

0 commit comments

Comments
 (0)