diff --git a/NEWS.rst b/NEWS.rst index 18cc7b70427..140fb2dac5e 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -9,6 +9,37 @@ .. towncrier release notes start +20.3.4+security.1 (2026-06-02) +============================== + +Security Fixes +-------------- + +- **CVE-2021-3572**: Stop splitting ``git show-ref`` output on unicode + separators when resolving a git revision, preventing a crafted ref name from + spoofing the resolved revision. (`GHSA-5xp3-jfq3-5q8x `_) +- **CVE-2023-5752**: Pass a Mercurial revision as a single ``-r=`` token so + a malicious revision cannot inject ``hg`` command-line options (e.g. + ``--config``). (`GHSA-mq26-g339-26xf `_) +- **CVE-2025-8869**: Validate that tar symlink members point to other members + inside the same archive before extraction, closing a symlink-escape on the + no-``data_filter`` fallback path. (`GHSA-4xh5-x5gv-qwph `_) +- **CVE-2026-3219**: Disambiguate archive type detection in ``unpack_file`` so a + tar+zip polyglot is no longer always treated as a zip; ambiguous archives are + rejected. (`GHSA-58qw-9mgm-455v `_) +- **CVE-2026-1703**: Enforce a path-component boundary in ``is_within_directory`` + (instead of a character-prefix comparison) so an extracted file cannot land in + a sibling directory whose name is a prefix of the install path. (`GHSA-6vgw-5pg2-w6jp `_) + +.. note + + CVE-2026-6357 / GHSA-jp4c-xjxw-mgf9 was assessed and found **not applicable** + to this release: pip 20.3.4 imports the self-version-check and all of its + dependencies eagerly at startup, and the post-install check touches only + vendored (``pip._vendor.*``) modules, so a newly-installed wheel has no + well-known module name to shadow. + + 20.3.4 (2021-01-23) =================== diff --git a/src/pip/__init__.py b/src/pip/__init__.py index f9bd0632fe2..dcdcebcfcac 100644 --- a/src/pip/__init__.py +++ b/src/pip/__init__.py @@ -4,7 +4,7 @@ from typing import List, Optional -__version__ = "20.3.4" +__version__ = "20.3.4+security.1" def main(args=None): diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 620f31ebb74..2a926e648c2 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -92,8 +92,33 @@ def is_within_directory(directory, target): abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) - prefix = os.path.commonprefix([abs_directory, abs_target]) - return prefix == abs_directory + # NOTE: os.path.commonprefix compares character-by-character, so it treats + # "/a/parent" and "/a/parentfoo" as sharing the prefix "/a/parent" + # and would report the latter as "within" the former (CVE-2026-1703). + # os.path.commonpath compares on path components but is unavailable on + # Python 2.7, so enforce a component boundary explicitly instead. + return abs_target == abs_directory or abs_target.startswith( + abs_directory + os.sep + ) + + +def is_symlink_target_in_tar(tar, tarinfo): + # type: (tarfile.TarFile, tarfile.TarInfo) -> bool + """ + Return true if the file the symlink member points to is itself a member + of the same tar archive, i.e. the symlink does not escape the archive to + reference an arbitrary path on the host filesystem (CVE-2025-8869). + """ + linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname) + + linkname = os.path.normpath(linkname) + linkname = linkname.replace("\\", "/") + + try: + tar.getmember(linkname) + return True + except KeyError: + return False def set_extracted_file_to_default_mode_plus_executable(path): @@ -206,6 +231,14 @@ def untar_file(filename, location): if member.isdir(): ensure_dir(path) elif member.issym(): + if not is_symlink_target_in_tar(tar, member): + message = ( + 'The tar file ({}) has a link ({}) trying to point ' + 'outside target directory ({})' + ) + raise InstallationError( + message.format(filename, member.name, member.linkname) + ) try: # https://github.com/python/typeshed/issues/2673 tar._extract_member(member, path) # type: ignore @@ -250,32 +283,61 @@ def unpack_file( ): # type: (...) -> None filename = os.path.realpath(filename) - if ( - content_type == 'application/zip' or - filename.lower().endswith(ZIP_EXTENSIONS) or - zipfile.is_zipfile(filename) - ): - unzip_file( - filename, - location, - flatten=not filename.endswith('.whl') - ) - elif ( - content_type == 'application/x-gzip' or - tarfile.is_tarfile(filename) or - filename.lower().endswith( - TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS - ) - ): + + # CVE-2026-3219: choose the archive format by decreasing reliability + # (content-type, then filename extension, then magic signature) and only + # trust the magic signature when it is unambiguous. The previous logic ran + # ``zipfile.is_zipfile`` for any unrecognised name; since that scans for a + # ZIP end-of-central-directory record anywhere in the file, a tar+ZIP + # polyglot (e.g. a ZIP appended to a valid ``.tar.gz``) was always treated + # as a ZIP regardless of its name. + def _unzip(): + # type: () -> None + unzip_file(filename, location, flatten=not filename.endswith('.whl')) + + def _untar(): + # type: () -> None untar_file(filename, location) - else: - # FIXME: handle? - # FIXME: magic signatures? + + if content_type == 'application/zip': + return _unzip() + if content_type == 'application/x-gzip': + return _untar() + + if filename.lower().endswith(ZIP_EXTENSIONS): + return _unzip() + if filename.lower().endswith( + TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS + ): + return _untar() + + # Fall back to magic signatures, but refuse the ambiguous case where the + # file is simultaneously a valid ZIP and a valid tar. + is_zip = zipfile.is_zipfile(filename) + is_tar = tarfile.is_tarfile(filename) + if is_zip and not is_tar: + return _unzip() + if is_tar and not is_zip: + return _untar() + if is_zip and is_tar: logger.critical( 'Cannot unpack file %s (downloaded from %s, content-type: %s); ' - 'cannot detect archive format', + 'it is ambiguously both a zip and a tar archive', filename, location, content_type, ) raise InstallationError( - 'Cannot determine archive format of {}'.format(location) + 'Ambiguous archive format (both zip and tar) of {}'.format( + location + ) ) + + # FIXME: handle? + # FIXME: magic signatures? + logger.critical( + 'Cannot unpack file %s (downloaded from %s, content-type: %s); ' + 'cannot detect archive format', + filename, location, content_type, + ) + raise InstallationError( + 'Cannot determine archive format of {}'.format(location) + ) diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 565961a0631..789b5148c9f 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -149,9 +149,15 @@ def get_revision_sha(cls, dest, rev): on_returncode='ignore', ) refs = {} - for line in output.strip().splitlines(): + # NOTE: We do not use splitlines here since that would split on other + # unicode separators, which can be maliciously used to install a + # different revision. + for line in output.strip().split("\n"): + line = line.rstrip("\r") + if not line: + continue try: - sha, ref = line.split() + sha, ref = line.split(" ", 1) except ValueError: # Include the offending line to simplify troubleshooting if # this error ever occurs. diff --git a/src/pip/_internal/vcs/mercurial.py b/src/pip/_internal/vcs/mercurial.py index d2d145f623f..1fc7f0d2042 100644 --- a/src/pip/_internal/vcs/mercurial.py +++ b/src/pip/_internal/vcs/mercurial.py @@ -38,7 +38,10 @@ class Mercurial(VersionControl): @staticmethod def get_base_rev_args(rev): - return [rev] + # CVE-2023-5752: pass the revision as a single ``-r=`` token so a + # malicious revision (e.g. ``--config=...``) cannot be interpreted by + # hg as a separate command-line option. + return ["-r={}".format(rev)] def export(self, location, url): # type: (str, HiddenText) -> None diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 5c2be24d429..5162cecf94e 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -1,3 +1,4 @@ +import io import os import shutil import stat @@ -10,7 +11,12 @@ import pytest from pip._internal.exceptions import InstallationError -from pip._internal.utils.unpacking import is_within_directory, untar_file, unzip_file +from pip._internal.utils.unpacking import ( + is_within_directory, + unpack_file, + untar_file, + unzip_file, +) class TestUnpackArchives(object): @@ -167,6 +173,42 @@ def test_unpack_tar_success(self): test_tar = self.make_tar_file('test_tar.tar', files) untar_file(test_tar, self.tempdir) + def test_unpack_tar_symlink_escape_failure(self): + """ + CVE-2025-8869: a tar symlink whose target is not a member of the + archive (i.e. points to an arbitrary host path) must be rejected. + """ + test_tar = os.path.join(self.tempdir, 'evil_symlink.tar') + with tarfile.open(test_tar, 'w') as tar: + info = tarfile.TarInfo('evil_link') + info.type = tarfile.SYMTYPE + info.linkname = os.path.join('..', '..', '..', 'etc', 'passwd') + tar.addfile(info) + with pytest.raises(InstallationError) as e: + untar_file(test_tar, os.path.join(self.tempdir, 'out')) + assert 'trying to point outside target directory' in str(e.value) + + def test_unpack_tar_symlink_to_member_success(self): + """ + A tar symlink pointing at another member of the same archive is safe + and is extracted unchanged. + """ + test_tar = os.path.join(self.tempdir, 'good_symlink.tar') + content = b'data\n' + with tarfile.open(test_tar, 'w') as tar: + finfo = tarfile.TarInfo('target.txt') + finfo.size = len(content) + tar.addfile(finfo, io.BytesIO(content)) + linfo = tarfile.TarInfo('link.txt') + linfo.type = tarfile.SYMTYPE + linfo.linkname = 'target.txt' + tar.addfile(linfo) + out = os.path.join(self.tempdir, 'out') + untar_file(test_tar, out) + link_path = os.path.join(out, 'link.txt') + assert os.path.islink(link_path) + assert os.readlink(link_path) == 'target.txt' + @pytest.mark.parametrize('args, expected', [ # Test the second containing the first. @@ -179,7 +221,65 @@ def test_unpack_tar_success(self): (('parent/', 'parent/sub'), True), # Test target outside parent (('parent/', 'parent/../sub'), False), + # CVE-2026-1703: a sibling whose name has the directory as a string + # prefix must not be considered "within" it. + (('parent/child', 'parent/childfoo'), False), ]) def test_is_within_directory(args, expected): result = is_within_directory(*args) assert result == expected + + +def _make_tar_zip_polyglot(path): + """Write a valid .tar.gz with a valid ZIP appended (a polyglot file). + + The tar view and the zip view each contain a ``payload.txt`` with + different contents so a test can tell which format pip chose. + """ + # Use a top-level directory (as real sdists/wheels do) so pip's + # leading-directory flattening behaves the same for both views. + tar_buf = io.BytesIO() + tar = tarfile.open(fileobj=tar_buf, mode='w:gz') + data = b'from-tar' + info = tarfile.TarInfo('pkg/payload.txt') + info.size = len(data) + tar.addfile(info, io.BytesIO(data)) + tar.close() + + zip_buf = io.BytesIO() + zf = zipfile.ZipFile(zip_buf, 'w') + zf.writestr('pkg/payload.txt', 'from-zip') + zf.close() + + with open(path, 'wb') as fp: + fp.write(tar_buf.getvalue() + zip_buf.getvalue()) + + +@pytest.mark.parametrize('filename, content_type, expected', [ + # CVE-2026-3219: a .tar.gz polyglot must be treated as a tar (by its + # name), not silently as the appended zip. + ('pkg.tar.gz', None, b'from-tar'), + ('pkg.tgz', None, b'from-tar'), + ('pkg.zip', None, b'from-zip'), + # An explicit content-type wins over the filename. + ('pkg.tar.gz', 'application/zip', b'from-zip'), + ('pkg.bin', 'application/x-gzip', b'from-tar'), +]) +def test_unpack_polyglot_routing(tmpdir, filename, content_type, expected): + archive = os.path.join(str(tmpdir), filename) + _make_tar_zip_polyglot(archive) + out = os.path.join(str(tmpdir), 'out') + unpack_file(archive, out, content_type=content_type) + with open(os.path.join(out, 'payload.txt'), 'rb') as fp: + assert fp.read() == expected + + +def test_unpack_polyglot_ambiguous_rejected(tmpdir): + """ + CVE-2026-3219: with no usable content-type or extension, a file that is + both a valid zip and a valid tar is ambiguous and must be rejected. + """ + archive = os.path.join(str(tmpdir), 'pkg.unknownext') + _make_tar_zip_polyglot(archive) + with pytest.raises(InstallationError): + unpack_file(archive, os.path.join(str(tmpdir), 'out')) diff --git a/tests/unit/test_vcs.py b/tests/unit/test_vcs.py index d36f9f01deb..1008f19b6ac 100644 --- a/tests/unit/test_vcs.py +++ b/tests/unit/test_vcs.py @@ -52,7 +52,7 @@ def test_rev_options_repr(): # First check VCS-specific RevOptions behavior. (Bazaar, [], ['-r', '123'], {}), (Git, ['HEAD'], ['123'], {}), - (Mercurial, [], ['123'], {}), + (Mercurial, [], ['-r=123'], {}), (Subversion, [], ['-r', '123'], {}), # Test extra_args. For this, test using a single VersionControl class. (Git, ['HEAD', 'opt1', 'opt2'], ['123', 'opt1', 'opt2'], @@ -201,6 +201,43 @@ def test_git_is_commit_id_equal(mock_get_revision, rev_name, result): assert Git.is_commit_id_equal('/path', rev_name) is result +@patch('pip._internal.vcs.git.Git.run_command') +def test_git_get_revision_sha_unicode_separator(run_command_mock): + """ + CVE-2021-3572: a git ref name containing a unicode line separator must + not be parsed into an extra, attacker-controlled ``show-ref`` line. + """ + real_sha = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + malicious_sha = 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' + # git emits a single physical line, but the tag name embeds U+2028 followed + # by a spoofed " refs/tags/1.0" payload. ``splitlines()`` would split + # on U+2028 and create a bogus refs/tags/1.0 -> malicious_sha mapping. + run_command_mock.return_value = ( + u'{real} refs/tags/legit\u2028{bad} refs/tags/1.0\n'.format( + real=real_sha, bad=malicious_sha, + ) + ) + sha, is_branch = Git.get_revision_sha('/path', '1.0') + # With the fix the U+2028 stays inside the single ref name, so no + # refs/tags/1.0 mapping to the malicious sha is created. + assert sha != malicious_sha + assert sha is None + assert is_branch is False + + +def test_mercurial_rev_not_interpreted_as_option(): + """ + CVE-2023-5752: a malicious hg revision (e.g. ``--config=...``) must be + passed as a single ``-r=`` token so hg cannot parse it as an option. + """ + rev = '--config=alias.update=!touch owned' + args = Mercurial.get_base_rev_args(rev) + assert args == ['-r={}'.format(rev)] + # A single token means hg treats the whole thing as the value of -r. + assert len(args) == 1 + assert args[0].startswith('-r=') + + # The non-SVN backends all use the same get_netloc_and_auth(), so only test # Git as a representative. @pytest.mark.parametrize('args, expected', [