diff --git a/Lib/test/archivetestdata/cp437-local-header.zip b/Lib/test/archivetestdata/cp437-local-header.zip new file mode 100644 index 00000000000000..81160520fdbdd4 Binary files /dev/null and b/Lib/test/archivetestdata/cp437-local-header.zip differ diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 0d407371f40a0f..4dd534095307db 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1959,6 +1959,28 @@ def test_write_unicode_filenames(self): self.assertEqual(zf.filelist[0].filename, "foo.txt") self.assertEqual(zf.filelist[1].filename, "\xf6.txt") + @requires_subprocess() + def test_add_comment_to_cp437_zip(self): + """GH-84353 follow-on regression test.""" + import shutil + if subprocess.call(["unzip", "-v"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL): + self.skipTest("InfoZip unzip command not in PATH") + fname = findfile('cp437-local-header.zip', subdir='archivetestdata') + with temp_dir() as tmpdir: + test_zip = shutil.copy(fname, tmpdir) + with zipfile.ZipFile(test_zip, "a", metadata_encoding='cp437') as zipfp: + self.assertEqual(['«HOTDOG»'], zipfp.namelist()) + zipfp.comment = b"bun" + # When the bug is present, test_zip is now corrupt. + # Its local header and central header differ. + with zipfile.ZipFile(test_zip, "r") as zipfp: + self.assertEqual(['«HOTDOG»'], zipfp.namelist()) + # unzip -t validates local and central header consistency. + # TODO: Could we write our own code to check the same thing + # using zipfile internals? External validation is nice. + unzip = subprocess.run(["unzip", "-t", test_zip], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + self.assertEqual(unzip.returncode, 0, msg=unzip.stdout.decode()) + def create_zipfile_with_extra_data(self, filename, extra_data_name): with zipfile.ZipFile(TESTFN, mode='w') as zf: filename_encoded = filename.encode("utf-8") diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 86c3bc36b695c7..25c29d3827be7d 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -515,7 +515,7 @@ def __repr__(self): result.append('>') return ''.join(result) - def FileHeader(self, zip64=None): + def FileHeader(self, zip64=None, metadata_encoding=None): """Return the per-file header as a bytes object. When the optional zip64 arg is None rather than a bool, we will @@ -557,7 +557,7 @@ def FileHeader(self, zip64=None): self.extract_version = max(min_version, self.extract_version) self.create_version = max(min_version, self.create_version) - filename, flag_bits = self._encodeFilenameFlags() + filename, flag_bits = self._encodeFilenameFlags(metadata_encoding) header = struct.pack(structFileHeader, stringFileHeader, self.extract_version, self.reserved, flag_bits, self.compress_type, dostime, dosdate, CRC, @@ -565,9 +565,9 @@ def FileHeader(self, zip64=None): len(filename), len(extra)) return header + filename + extra - def _encodeFilenameFlags(self): + def _encodeFilenameFlags(self, metadata_encoding): try: - return self.filename.encode('ascii'), self.flag_bits + return self.filename.encode(metadata_encoding or 'ascii'), self.flag_bits except UnicodeEncodeError: return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME @@ -1370,7 +1370,7 @@ def close(self): # Preserve current position in file self._zipfile.start_dir = self._fileobj.tell() self._fileobj.seek(self._zinfo.header_offset) - self._fileobj.write(self._zinfo.FileHeader(self._zip64)) + self._fileobj.write(self._zinfo.FileHeader(self._zip64, self.metadata_encoding)) self._fileobj.seek(self._zipfile.start_dir) # Successfully written: Add file to our caches @@ -1435,9 +1435,9 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self.metadata_encoding = metadata_encoding # Check that we don't try to write with nonconforming codecs - if self.metadata_encoding and mode != 'r': - raise ValueError( - "metadata_encoding is only supported for reading files") + # if self.metadata_encoding and mode != 'r': + # raise ValueError( + # "metadata_encoding is only supported for reading files") # Check if we were passed a file-like object if isinstance(file, os.PathLike): @@ -1830,7 +1830,7 @@ def _open_to_write(self, zinfo, force_zip64=False): self._writecheck(zinfo) self._didModify = True - self.fp.write(zinfo.FileHeader(zip64)) + self.fp.write(zinfo.FileHeader(zip64, self.metadata_encoding)) self._writing = True return _ZipWriteFile(self, zinfo, zip64) @@ -2062,7 +2062,7 @@ def mkdir(self, zinfo_or_directory_name, mode=511): self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo - self.fp.write(zinfo.FileHeader(False)) + self.fp.write(zinfo.FileHeader(False, self.metadata_encoding)) self.start_dir = self.fp.tell() def __del__(self): @@ -2133,7 +2133,7 @@ def _write_end_record(self): extract_version = max(min_version, zinfo.extract_version) create_version = max(min_version, zinfo.create_version) - filename, flag_bits = zinfo._encodeFilenameFlags() + filename, flag_bits = zinfo._encodeFilenameFlags(self.metadata_encoding) centdir = struct.pack(structCentralDir, stringCentralDir, create_version, zinfo.create_system, extract_version, zinfo.reserved,