Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions NEWS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,37 @@

.. towncrier release notes start

20.3.4+security.1 (2026-06-02)
==============================

Security Fixes
--------------

- **CVE-2021-3572**: Stop splitting ``git show-ref`` output on unicode
separators when resolving a git revision, preventing a crafted ref name from
spoofing the resolved revision. (`GHSA-5xp3-jfq3-5q8x <https://github.com/advisories/GHSA-5xp3-jfq3-5q8x>`_)
- **CVE-2023-5752**: Pass a Mercurial revision as a single ``-r=<rev>`` token so
a malicious revision cannot inject ``hg`` command-line options (e.g.
``--config``). (`GHSA-mq26-g339-26xf <https://github.com/advisories/GHSA-mq26-g339-26xf>`_)
- **CVE-2025-8869**: Validate that tar symlink members point to other members
inside the same archive before extraction, closing a symlink-escape on the
no-``data_filter`` fallback path. (`GHSA-4xh5-x5gv-qwph <https://github.com/advisories/GHSA-4xh5-x5gv-qwph>`_)
- **CVE-2026-3219**: Disambiguate archive type detection in ``unpack_file`` so a
tar+zip polyglot is no longer always treated as a zip; ambiguous archives are
rejected. (`GHSA-58qw-9mgm-455v <https://github.com/advisories/GHSA-58qw-9mgm-455v>`_)
- **CVE-2026-1703**: Enforce a path-component boundary in ``is_within_directory``
(instead of a character-prefix comparison) so an extracted file cannot land in
a sibling directory whose name is a prefix of the install path. (`GHSA-6vgw-5pg2-w6jp <https://github.com/advisories/GHSA-6vgw-5pg2-w6jp>`_)

.. note

CVE-2026-6357 / GHSA-jp4c-xjxw-mgf9 was assessed and found **not applicable**
to this release: pip 20.3.4 imports the self-version-check and all of its
dependencies eagerly at startup, and the post-install check touches only
vendored (``pip._vendor.*``) modules, so a newly-installed wheel has no
well-known module name to shadow.


20.3.4 (2021-01-23)
===================

Expand Down
2 changes: 1 addition & 1 deletion src/pip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import List, Optional


__version__ = "20.3.4"
__version__ = "20.3.4+security.1"


def main(args=None):
Expand Down
110 changes: 86 additions & 24 deletions src/pip/_internal/utils/unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,33 @@ def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
# NOTE: os.path.commonprefix compares character-by-character, so it treats
# "/a/parent" and "/a/parentfoo" as sharing the prefix "/a/parent"
# and would report the latter as "within" the former (CVE-2026-1703).
# os.path.commonpath compares on path components but is unavailable on
# Python 2.7, so enforce a component boundary explicitly instead.
return abs_target == abs_directory or abs_target.startswith(
abs_directory + os.sep
)


def is_symlink_target_in_tar(tar, tarinfo):
# type: (tarfile.TarFile, tarfile.TarInfo) -> bool
"""
Return true if the file the symlink member points to is itself a member
of the same tar archive, i.e. the symlink does not escape the archive to
reference an arbitrary path on the host filesystem (CVE-2025-8869).
"""
linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname)

linkname = os.path.normpath(linkname)
linkname = linkname.replace("\\", "/")

try:
tar.getmember(linkname)
return True
except KeyError:
return False


def set_extracted_file_to_default_mode_plus_executable(path):
Expand Down Expand Up @@ -206,6 +231,14 @@ def untar_file(filename, location):
if member.isdir():
ensure_dir(path)
elif member.issym():
if not is_symlink_target_in_tar(tar, member):
message = (
'The tar file ({}) has a link ({}) trying to point '
'outside target directory ({})'
)
raise InstallationError(
message.format(filename, member.name, member.linkname)
)
try:
# https://github.com/python/typeshed/issues/2673
tar._extract_member(member, path) # type: ignore
Expand Down Expand Up @@ -250,32 +283,61 @@ def unpack_file(
):
# type: (...) -> None
filename = os.path.realpath(filename)
if (
content_type == 'application/zip' or
filename.lower().endswith(ZIP_EXTENSIONS) or
zipfile.is_zipfile(filename)
):
unzip_file(
filename,
location,
flatten=not filename.endswith('.whl')
)
elif (
content_type == 'application/x-gzip' or
tarfile.is_tarfile(filename) or
filename.lower().endswith(
TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS
)
):

# CVE-2026-3219: choose the archive format by decreasing reliability
# (content-type, then filename extension, then magic signature) and only
# trust the magic signature when it is unambiguous. The previous logic ran
# ``zipfile.is_zipfile`` for any unrecognised name; since that scans for a
# ZIP end-of-central-directory record anywhere in the file, a tar+ZIP
# polyglot (e.g. a ZIP appended to a valid ``.tar.gz``) was always treated
# as a ZIP regardless of its name.
def _unzip():
# type: () -> None
unzip_file(filename, location, flatten=not filename.endswith('.whl'))

def _untar():
# type: () -> None
untar_file(filename, location)
else:
# FIXME: handle?
# FIXME: magic signatures?

if content_type == 'application/zip':
return _unzip()
if content_type == 'application/x-gzip':
return _untar()

if filename.lower().endswith(ZIP_EXTENSIONS):
return _unzip()
if filename.lower().endswith(
TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS
):
return _untar()

# Fall back to magic signatures, but refuse the ambiguous case where the
# file is simultaneously a valid ZIP and a valid tar.
is_zip = zipfile.is_zipfile(filename)
is_tar = tarfile.is_tarfile(filename)
if is_zip and not is_tar:
return _unzip()
if is_tar and not is_zip:
return _untar()
if is_zip and is_tar:
logger.critical(
'Cannot unpack file %s (downloaded from %s, content-type: %s); '
'cannot detect archive format',
'it is ambiguously both a zip and a tar archive',
filename, location, content_type,
)
raise InstallationError(
'Cannot determine archive format of {}'.format(location)
'Ambiguous archive format (both zip and tar) of {}'.format(
location
)
)

# FIXME: handle?
# FIXME: magic signatures?
logger.critical(
'Cannot unpack file %s (downloaded from %s, content-type: %s); '
'cannot detect archive format',
filename, location, content_type,
)
raise InstallationError(
'Cannot determine archive format of {}'.format(location)
)
10 changes: 8 additions & 2 deletions src/pip/_internal/vcs/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,15 @@ def get_revision_sha(cls, dest, rev):
on_returncode='ignore',
)
refs = {}
for line in output.strip().splitlines():
# NOTE: We do not use splitlines here since that would split on other
# unicode separators, which can be maliciously used to install a
# different revision.
for line in output.strip().split("\n"):
line = line.rstrip("\r")
if not line:
continue
try:
sha, ref = line.split()
sha, ref = line.split(" ", 1)
except ValueError:
# Include the offending line to simplify troubleshooting if
# this error ever occurs.
Expand Down
5 changes: 4 additions & 1 deletion src/pip/_internal/vcs/mercurial.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ class Mercurial(VersionControl):

@staticmethod
def get_base_rev_args(rev):
return [rev]
# CVE-2023-5752: pass the revision as a single ``-r=<rev>`` token so a
# malicious revision (e.g. ``--config=...``) cannot be interpreted by
# hg as a separate command-line option.
return ["-r={}".format(rev)]

def export(self, location, url):
# type: (str, HiddenText) -> None
Expand Down
102 changes: 101 additions & 1 deletion tests/unit/test_utils_unpacking.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import os
import shutil
import stat
Expand All @@ -10,7 +11,12 @@
import pytest

from pip._internal.exceptions import InstallationError
from pip._internal.utils.unpacking import is_within_directory, untar_file, unzip_file
from pip._internal.utils.unpacking import (
is_within_directory,
unpack_file,
untar_file,
unzip_file,
)


class TestUnpackArchives(object):
Expand Down Expand Up @@ -167,6 +173,42 @@ def test_unpack_tar_success(self):
test_tar = self.make_tar_file('test_tar.tar', files)
untar_file(test_tar, self.tempdir)

def test_unpack_tar_symlink_escape_failure(self):
"""
CVE-2025-8869: a tar symlink whose target is not a member of the
archive (i.e. points to an arbitrary host path) must be rejected.
"""
test_tar = os.path.join(self.tempdir, 'evil_symlink.tar')
with tarfile.open(test_tar, 'w') as tar:
info = tarfile.TarInfo('evil_link')
info.type = tarfile.SYMTYPE
info.linkname = os.path.join('..', '..', '..', 'etc', 'passwd')
tar.addfile(info)
with pytest.raises(InstallationError) as e:
untar_file(test_tar, os.path.join(self.tempdir, 'out'))
assert 'trying to point outside target directory' in str(e.value)

def test_unpack_tar_symlink_to_member_success(self):
"""
A tar symlink pointing at another member of the same archive is safe
and is extracted unchanged.
"""
test_tar = os.path.join(self.tempdir, 'good_symlink.tar')
content = b'data\n'
with tarfile.open(test_tar, 'w') as tar:
finfo = tarfile.TarInfo('target.txt')
finfo.size = len(content)
tar.addfile(finfo, io.BytesIO(content))
linfo = tarfile.TarInfo('link.txt')
linfo.type = tarfile.SYMTYPE
linfo.linkname = 'target.txt'
tar.addfile(linfo)
out = os.path.join(self.tempdir, 'out')
untar_file(test_tar, out)
link_path = os.path.join(out, 'link.txt')
assert os.path.islink(link_path)
assert os.readlink(link_path) == 'target.txt'


@pytest.mark.parametrize('args, expected', [
# Test the second containing the first.
Expand All @@ -179,7 +221,65 @@ def test_unpack_tar_success(self):
(('parent/', 'parent/sub'), True),
# Test target outside parent
(('parent/', 'parent/../sub'), False),
# CVE-2026-1703: a sibling whose name has the directory as a string
# prefix must not be considered "within" it.
(('parent/child', 'parent/childfoo'), False),
])
def test_is_within_directory(args, expected):
result = is_within_directory(*args)
assert result == expected


def _make_tar_zip_polyglot(path):
"""Write a valid .tar.gz with a valid ZIP appended (a polyglot file).

The tar view and the zip view each contain a ``payload.txt`` with
different contents so a test can tell which format pip chose.
"""
# Use a top-level directory (as real sdists/wheels do) so pip's
# leading-directory flattening behaves the same for both views.
tar_buf = io.BytesIO()
tar = tarfile.open(fileobj=tar_buf, mode='w:gz')
data = b'from-tar'
info = tarfile.TarInfo('pkg/payload.txt')
info.size = len(data)
tar.addfile(info, io.BytesIO(data))
tar.close()

zip_buf = io.BytesIO()
zf = zipfile.ZipFile(zip_buf, 'w')
zf.writestr('pkg/payload.txt', 'from-zip')
zf.close()

with open(path, 'wb') as fp:
fp.write(tar_buf.getvalue() + zip_buf.getvalue())


@pytest.mark.parametrize('filename, content_type, expected', [
# CVE-2026-3219: a .tar.gz polyglot must be treated as a tar (by its
# name), not silently as the appended zip.
('pkg.tar.gz', None, b'from-tar'),
('pkg.tgz', None, b'from-tar'),
('pkg.zip', None, b'from-zip'),
# An explicit content-type wins over the filename.
('pkg.tar.gz', 'application/zip', b'from-zip'),
('pkg.bin', 'application/x-gzip', b'from-tar'),
])
def test_unpack_polyglot_routing(tmpdir, filename, content_type, expected):
archive = os.path.join(str(tmpdir), filename)
_make_tar_zip_polyglot(archive)
out = os.path.join(str(tmpdir), 'out')
unpack_file(archive, out, content_type=content_type)
with open(os.path.join(out, 'payload.txt'), 'rb') as fp:
assert fp.read() == expected


def test_unpack_polyglot_ambiguous_rejected(tmpdir):
"""
CVE-2026-3219: with no usable content-type or extension, a file that is
both a valid zip and a valid tar is ambiguous and must be rejected.
"""
archive = os.path.join(str(tmpdir), 'pkg.unknownext')
_make_tar_zip_polyglot(archive)
with pytest.raises(InstallationError):
unpack_file(archive, os.path.join(str(tmpdir), 'out'))
39 changes: 38 additions & 1 deletion tests/unit/test_vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_rev_options_repr():
# First check VCS-specific RevOptions behavior.
(Bazaar, [], ['-r', '123'], {}),
(Git, ['HEAD'], ['123'], {}),
(Mercurial, [], ['123'], {}),
(Mercurial, [], ['-r=123'], {}),
(Subversion, [], ['-r', '123'], {}),
# Test extra_args. For this, test using a single VersionControl class.
(Git, ['HEAD', 'opt1', 'opt2'], ['123', 'opt1', 'opt2'],
Expand Down Expand Up @@ -201,6 +201,43 @@ def test_git_is_commit_id_equal(mock_get_revision, rev_name, result):
assert Git.is_commit_id_equal('/path', rev_name) is result


@patch('pip._internal.vcs.git.Git.run_command')
def test_git_get_revision_sha_unicode_separator(run_command_mock):
"""
CVE-2021-3572: a git ref name containing a unicode line separator must
not be parsed into an extra, attacker-controlled ``show-ref`` line.
"""
real_sha = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
malicious_sha = 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
# git emits a single physical line, but the tag name embeds U+2028 followed
# by a spoofed "<sha> refs/tags/1.0" payload. ``splitlines()`` would split
# on U+2028 and create a bogus refs/tags/1.0 -> malicious_sha mapping.
run_command_mock.return_value = (
u'{real} refs/tags/legit\u2028{bad} refs/tags/1.0\n'.format(
real=real_sha, bad=malicious_sha,
)
)
sha, is_branch = Git.get_revision_sha('/path', '1.0')
# With the fix the U+2028 stays inside the single ref name, so no
# refs/tags/1.0 mapping to the malicious sha is created.
assert sha != malicious_sha
assert sha is None
assert is_branch is False


def test_mercurial_rev_not_interpreted_as_option():
"""
CVE-2023-5752: a malicious hg revision (e.g. ``--config=...``) must be
passed as a single ``-r=<rev>`` token so hg cannot parse it as an option.
"""
rev = '--config=alias.update=!touch owned'
args = Mercurial.get_base_rev_args(rev)
assert args == ['-r={}'.format(rev)]
# A single token means hg treats the whole thing as the value of -r.
assert len(args) == 1
assert args[0].startswith('-r=')


# The non-SVN backends all use the same get_netloc_and_auth(), so only test
# Git as a representative.
@pytest.mark.parametrize('args, expected', [
Expand Down
Loading