Skip to content

Commit 0d28f5e

Browse files
miss-islingtonencukougpshead
authored
[3.12] gh-149486: tarfile.data_filter: validate written link target (GH-149487) (#149556)
* gh-149486: tarfile.data_filter: validate written link target (GH-149487) The data filter rewrote linknames with normpath() but ran the containment check against the un-normalised value, and computed a symlink's directory before stripping trailing slashes. Both let a crafted archive create links pointing outside the destination. Also reject link members that resolve to the destination directory itself, which could otherwise replace it with a symlink and redirect all subsequent members. (Patch by Greg; Petr's just reviewing & merging.) (cherry picked from commit 5784119) Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Gregory P. Smith <greg@krypto.org> * Move dotdot_resolves_early setting to setUpClass --------- Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 6cad028 commit 0d28f5e

3 files changed

Lines changed: 133 additions & 39 deletions

File tree

Lib/tarfile.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -816,16 +816,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
816816
if member.islnk() or member.issym():
817817
if os.path.isabs(member.linkname):
818818
raise AbsoluteLinkError(member)
819+
# A link member that resolves to the destination directory itself
820+
# would replace it with a (sym)link, redirecting the destination
821+
# for all subsequent members.
822+
if target_path == dest_path:
823+
raise OutsideDestinationError(member, target_path)
819824
normalized = os.path.normpath(member.linkname)
820825
if normalized != member.linkname:
821826
new_attrs['linkname'] = normalized
822827
if member.issym():
823-
target_path = os.path.join(dest_path,
824-
os.path.dirname(name),
825-
member.linkname)
828+
# The symlink is created at `name` with trailing separators
829+
# stripped, so its target is relative to the directory
830+
# containing that path.
831+
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
832+
target_path = os.path.join(dest_path, link_dir, normalized)
826833
else:
827-
target_path = os.path.join(dest_path,
828-
member.linkname)
834+
target_path = os.path.join(dest_path, normalized)
829835
target_path = os.path.realpath(target_path,
830836
strict=os.path.ALLOW_MISSING)
831837
if os.path.commonpath([target_path, dest_path]) != dest_path:

Lib/test/test_tarfile.py

Lines changed: 117 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3549,6 +3549,39 @@ class TestExtractionFilters(unittest.TestCase):
35493549
# The destination for the extraction, within `outerdir`
35503550
destdir = outerdir / 'dest'
35513551

3552+
@classmethod
3553+
def setUpClass(cls):
3554+
# Posix and Windows have different pathname resolution:
3555+
# either symlink or a '..' component resolve first.
3556+
# Let's see which we are on.
3557+
if os_helper.can_symlink():
3558+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3559+
os.mkdir(testpath)
3560+
3561+
# testpath/current links to `.` which is all of:
3562+
# - `testpath`
3563+
# - `testpath/current`
3564+
# - `testpath/current/current`
3565+
# - etc.
3566+
os.symlink('.', os.path.join(testpath, 'current'))
3567+
3568+
# we'll test where `testpath/current/../file` ends up
3569+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3570+
pass
3571+
3572+
if os.path.exists(os.path.join(testpath, 'file')):
3573+
# Windows collapses 'current\..' to '.' first, leaving
3574+
# 'testpath\file'
3575+
cls.dotdot_resolves_early = True
3576+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3577+
# Posix resolves 'current' to '.' first, leaving
3578+
# 'testpath/../file'
3579+
cls.dotdot_resolves_early = False
3580+
else:
3581+
raise AssertionError('Could not determine link resolution')
3582+
else:
3583+
cls.dotdot_resolves_early = False
3584+
35523585
@contextmanager
35533586
def check_context(self, tar, filter, *, check_flag=True):
35543587
"""Extracts `tar` to `self.destdir` and allows checking the result
@@ -3720,10 +3753,19 @@ def test_parent_symlink(self):
37203753
+ "which is outside the destination")
37213754

37223755
with self.check_context(arc.open(), 'data'):
3723-
self.expect_exception(
3724-
tarfile.LinkOutsideDestinationError,
3725-
"""'parent' would link to ['"].*outerdir['"], """
3726-
+ "which is outside the destination")
3756+
if self.dotdot_resolves_early:
3757+
# 'current/../..' normalises to '..', which is rejected.
3758+
self.expect_exception(
3759+
tarfile.LinkOutsideDestinationError,
3760+
"""'parent' would link to ['"].*outerdir['"], """
3761+
+ "which is outside the destination")
3762+
else:
3763+
# 'current/..' normalises to '.'; the rewritten link is
3764+
# created and 'parent/evil' lands harmlessly inside the
3765+
# destination.
3766+
self.expect_file('current', symlink_to='.')
3767+
self.expect_file('parent', symlink_to='.')
3768+
self.expect_file('evil')
37273769

37283770
else:
37293771
# No symlink support. The symlinks are ignored.
@@ -3813,35 +3855,6 @@ def test_parent_symlink2(self):
38133855
# Test interplaying symlinks
38143856
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
38153857

3816-
# Posix and Windows have different pathname resolution:
3817-
# either symlink or a '..' component resolve first.
3818-
# Let's see which we are on.
3819-
if os_helper.can_symlink():
3820-
testpath = os.path.join(TEMPDIR, 'resolution_test')
3821-
os.mkdir(testpath)
3822-
3823-
# testpath/current links to `.` which is all of:
3824-
# - `testpath`
3825-
# - `testpath/current`
3826-
# - `testpath/current/current`
3827-
# - etc.
3828-
os.symlink('.', os.path.join(testpath, 'current'))
3829-
3830-
# we'll test where `testpath/current/../file` ends up
3831-
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3832-
pass
3833-
3834-
if os.path.exists(os.path.join(testpath, 'file')):
3835-
# Windows collapses 'current\..' to '.' first, leaving
3836-
# 'testpath\file'
3837-
dotdot_resolves_early = True
3838-
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3839-
# Posix resolves 'current' to '.' first, leaving
3840-
# 'testpath/../file'
3841-
dotdot_resolves_early = False
3842-
else:
3843-
raise AssertionError('Could not determine link resolution')
3844-
38453858
with ArchiveMaker() as arc:
38463859

38473860
# `current` links to `.` which is both the destination directory
@@ -3877,7 +3890,7 @@ def test_parent_symlink2(self):
38773890

38783891
with self.check_context(arc.open(), 'data'):
38793892
if os_helper.can_symlink():
3880-
if dotdot_resolves_early:
3893+
if self.dotdot_resolves_early:
38813894
# Fail when extracting a file outside destination
38823895
self.expect_exception(
38833896
tarfile.OutsideDestinationError,
@@ -3997,6 +4010,76 @@ def test_sly_relative2(self):
39974010
+ """['"].*moo['"], which is outside the """
39984011
+ "destination")
39994012

4013+
@symlink_test
4014+
@os_helper.skip_unless_symlink
4015+
def test_normpath_realpath_mismatch(self):
4016+
# The link-target check must validate the value that will actually
4017+
# be written to disk (the normalised linkname), not the original.
4018+
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
4019+
# of 'a/../../...' stays inside the destination while normpath()
4020+
# collapses 'a/..' lexically and escapes.
4021+
depth = len(self.destdir.parts) + 5
4022+
deep = '/'.join(f'p{i}' for i in range(depth))
4023+
sneaky = 'a/' + '../' * depth + 'flag'
4024+
for kind in 'symlink_to', 'hardlink_to':
4025+
with self.subTest(kind):
4026+
with ArchiveMaker() as arc:
4027+
arc.add('a', symlink_to=deep)
4028+
arc.add('escape', **{kind: sneaky})
4029+
with self.check_context(arc.open(), 'data'):
4030+
self.expect_exception(
4031+
tarfile.LinkOutsideDestinationError)
4032+
4033+
@symlink_test
4034+
@os_helper.skip_unless_symlink
4035+
def test_symlink_trailing_slash(self):
4036+
# A trailing slash on a symlink member's name must not cause the
4037+
# link target to be resolved relative to the wrong directory.
4038+
with ArchiveMaker() as arc:
4039+
t = tarfile.TarInfo('x/')
4040+
t.type = tarfile.SYMTYPE
4041+
t.linkname = '..'
4042+
arc.tar_w.addfile(t)
4043+
arc.add('x/escaped', content='hi')
4044+
4045+
with self.check_context(arc.open(), 'data'):
4046+
self.expect_exception(tarfile.LinkOutsideDestinationError)
4047+
4048+
@symlink_test
4049+
@os_helper.skip_unless_symlink
4050+
def test_link_at_destination(self):
4051+
# A link member whose name resolves to the destination directory
4052+
# itself must be rejected: otherwise the destination is replaced
4053+
# by a symlink and later members can be redirected through it.
4054+
for name in '', '.', './':
4055+
with ArchiveMaker() as arc:
4056+
t = tarfile.TarInfo(name)
4057+
t.type = tarfile.SYMTYPE
4058+
t.linkname = '.'
4059+
arc.tar_w.addfile(t)
4060+
4061+
with self.check_context(arc.open(), 'data'):
4062+
self.expect_exception(tarfile.OutsideDestinationError)
4063+
4064+
@symlink_test
4065+
@os_helper.skip_unless_symlink
4066+
def test_empty_name_symlink_chain(self):
4067+
# Regression test for a chain of empty-named symlinks that
4068+
# incrementally redirects the destination outwards.
4069+
with ArchiveMaker() as arc:
4070+
for name, target in [('', ''), ('a/', '..'),
4071+
('', 'dummy'), ('', 'a'),
4072+
('b/', '..'),
4073+
('', 'dummy'), ('', 'a/b')]:
4074+
t = tarfile.TarInfo(name)
4075+
t.type = tarfile.SYMTYPE
4076+
t.linkname = target
4077+
arc.tar_w.addfile(t)
4078+
arc.add('escaped', content='hi')
4079+
4080+
with self.check_context(arc.open(), 'data'):
4081+
self.expect_exception(tarfile.FilterError)
4082+
40004083
@symlink_test
40014084
def test_deep_symlink(self):
40024085
# Test that symlinks and hardlinks inside a directory
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:func:`tarfile.data_filter` now validates link targets using the same
2+
normalised value that is written to disk, strips trailing separators from
3+
the member name when resolving a symlink's directory, and rejects link
4+
members that would replace the destination directory itself. This closes
5+
several path-traversal bypasses of the ``data`` extraction filter.

0 commit comments

Comments
 (0)