Minimal patch for pip diff -rU3 pip-orig/src/pip/_internal/utils/unpacking.py pip/src/pip/_internal/utils/unpacking.py --- pip-orig/src/pip/_internal/utils/unpacking.py 2022-11-05 16:25:43.000000000 +0100 +++ pip/src/pip/_internal/utils/unpacking.py 2023-08-08 13:17:47.705613554 +0200 @@ -184,6 +184,13 @@ "outside target directory ({})" ) raise InstallationError(message.format(filename, path, location)) + + # Call the `data` filter for its side effect (raising exception) + try: + tarfile.data_filter(member.replace(name=fn), location) + except tarfile.LinkOutsideDestinationError: + pass + if member.isdir(): ensure_dir(path) elif member.issym(): Test from https://github.com/pypa/pip/pull/12214 diff -rU3 pip-orig/tests/unit/test_utils_unpacking.py pip/tests/unit/test_utils_unpacking.py --- pip-orig/tests/unit/test_utils_unpacking.py 2022-11-05 16:25:43.000000000 +0100 +++ pip/tests/unit/test_utils_unpacking.py 2023-08-08 13:17:35.151540108 +0200 @@ -167,6 +167,23 @@ test_tar = self.make_tar_file('test_tar.tar', files) untar_file(test_tar, self.tempdir) + def test_unpack_tar_filter(self) -> None: + """ + Test that the tarfile.data_filter is used to disallow dangerous + behaviour (PEP-721) + """ + test_tar = os.path.join(self.tempdir, "test_tar_filter.tar") + with tarfile.open(test_tar, "w") as mytar: + file_tarinfo = tarfile.TarInfo("bad-link") + file_tarinfo.type = tarfile.SYMTYPE + file_tarinfo.linkname = "../../../../pwn" + mytar.addfile(file_tarinfo, io.BytesIO(b"")) + with pytest.raises(InstallationError) as e: + untar_file(test_tar, self.tempdir) + + assert "is outside the destination" in str(e.value) + + def test_unpack_tar_unicode(tmpdir): test_tar = tmpdir / "test.tar" Patch for vendored distlib from https://github.com/pypa/distlib/pull/201 diff --git a/distlib/util.py b/distlib/util.py index e0622e4..4349d0b 100644 --- a/src/pip/_vendor/distlib/util.py +++ b/src/pip/_vendor/distlib/util.py @@ -1249,6 +1249,19 @@ def check_path(path): for tarinfo in archive.getmembers(): if not isinstance(tarinfo.name, text_type): tarinfo.name = tarinfo.name.decode('utf-8') + + # Limit extraction of dangerous items, if this Python + # allows it easily. If not, just trust the input. + # See: https://docs.python.org/3/library/tarfile.html#extraction-filters + def extraction_filter(member, path): + """Run tarfile.tar_fillter, but raise the expected ValueError""" + # This is only called if the current Python has tarfile filters + try: + return tarfile.tar_filter(member, path) + except tarfile.FilterError as exc: + raise ValueError(str(exc)) + archive.extraction_filter = extraction_filter + archive.extractall(dest_dir) finally: