Skip to content

Commit 2e6028a

Browse files
gh-84353: Enable metadata_encoding in writing and appending to ZipFile
1 parent 00ea776 commit 2e6028a

3 files changed

Lines changed: 33 additions & 11 deletions

File tree

117 Bytes
Binary file not shown.

Lib/test/test_zipfile/test_core.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,28 @@ def test_write_unicode_filenames(self):
19591959
self.assertEqual(zf.filelist[0].filename, "foo.txt")
19601960
self.assertEqual(zf.filelist[1].filename, "\xf6.txt")
19611961

1962+
@requires_subprocess()
1963+
def test_add_comment_to_cp437_zip(self):
1964+
"""GH-84353 follow-on regression test."""
1965+
import shutil
1966+
if subprocess.call(["unzip", "-v"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL):
1967+
self.skipTest("InfoZip unzip command not in PATH")
1968+
fname = findfile('cp437-local-header.zip', subdir='archivetestdata')
1969+
with temp_dir() as tmpdir:
1970+
test_zip = shutil.copy(fname, tmpdir)
1971+
with zipfile.ZipFile(test_zip, "a", metadata_encoding='cp437') as zipfp:
1972+
self.assertEqual(['«HOTDOG»'], zipfp.namelist())
1973+
zipfp.comment = b"bun"
1974+
# When the bug is present, test_zip is now corrupt.
1975+
# Its local header and central header differ.
1976+
with zipfile.ZipFile(test_zip, "r") as zipfp:
1977+
self.assertEqual(['«HOTDOG»'], zipfp.namelist())
1978+
# unzip -t validates local and central header consistency.
1979+
# TODO: Could we write our own code to check the same thing
1980+
# using zipfile internals? External validation is nice.
1981+
unzip = subprocess.run(["unzip", "-t", test_zip], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1982+
self.assertEqual(unzip.returncode, 0, msg=unzip.stdout.decode())
1983+
19621984
def create_zipfile_with_extra_data(self, filename, extra_data_name):
19631985
with zipfile.ZipFile(TESTFN, mode='w') as zf:
19641986
filename_encoded = filename.encode("utf-8")

Lib/zipfile/__init__.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ def __repr__(self):
515515
result.append('>')
516516
return ''.join(result)
517517

518-
def FileHeader(self, zip64=None):
518+
def FileHeader(self, zip64=None, metadata_encoding=None):
519519
"""Return the per-file header as a bytes object.
520520
521521
When the optional zip64 arg is None rather than a bool, we will
@@ -557,17 +557,17 @@ def FileHeader(self, zip64=None):
557557

558558
self.extract_version = max(min_version, self.extract_version)
559559
self.create_version = max(min_version, self.create_version)
560-
filename, flag_bits = self._encodeFilenameFlags()
560+
filename, flag_bits = self._encodeFilenameFlags(metadata_encoding)
561561
header = struct.pack(structFileHeader, stringFileHeader,
562562
self.extract_version, self.reserved, flag_bits,
563563
self.compress_type, dostime, dosdate, CRC,
564564
compress_size, file_size,
565565
len(filename), len(extra))
566566
return header + filename + extra
567567

568-
def _encodeFilenameFlags(self):
568+
def _encodeFilenameFlags(self, metadata_encoding):
569569
try:
570-
return self.filename.encode('ascii'), self.flag_bits
570+
return self.filename.encode(metadata_encoding or 'ascii'), self.flag_bits
571571
except UnicodeEncodeError:
572572
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
573573

@@ -1370,7 +1370,7 @@ def close(self):
13701370
# Preserve current position in file
13711371
self._zipfile.start_dir = self._fileobj.tell()
13721372
self._fileobj.seek(self._zinfo.header_offset)
1373-
self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1373+
self._fileobj.write(self._zinfo.FileHeader(self._zip64, self.metadata_encoding))
13741374
self._fileobj.seek(self._zipfile.start_dir)
13751375

13761376
# Successfully written: Add file to our caches
@@ -1435,9 +1435,9 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
14351435
self.metadata_encoding = metadata_encoding
14361436

14371437
# Check that we don't try to write with nonconforming codecs
1438-
if self.metadata_encoding and mode != 'r':
1439-
raise ValueError(
1440-
"metadata_encoding is only supported for reading files")
1438+
# if self.metadata_encoding and mode != 'r':
1439+
# raise ValueError(
1440+
# "metadata_encoding is only supported for reading files")
14411441

14421442
# Check if we were passed a file-like object
14431443
if isinstance(file, os.PathLike):
@@ -1830,7 +1830,7 @@ def _open_to_write(self, zinfo, force_zip64=False):
18301830
self._writecheck(zinfo)
18311831
self._didModify = True
18321832

1833-
self.fp.write(zinfo.FileHeader(zip64))
1833+
self.fp.write(zinfo.FileHeader(zip64, self.metadata_encoding))
18341834

18351835
self._writing = True
18361836
return _ZipWriteFile(self, zinfo, zip64)
@@ -2062,7 +2062,7 @@ def mkdir(self, zinfo_or_directory_name, mode=511):
20622062

20632063
self.filelist.append(zinfo)
20642064
self.NameToInfo[zinfo.filename] = zinfo
2065-
self.fp.write(zinfo.FileHeader(False))
2065+
self.fp.write(zinfo.FileHeader(False, self.metadata_encoding))
20662066
self.start_dir = self.fp.tell()
20672067

20682068
def __del__(self):
@@ -2133,7 +2133,7 @@ def _write_end_record(self):
21332133

21342134
extract_version = max(min_version, zinfo.extract_version)
21352135
create_version = max(min_version, zinfo.create_version)
2136-
filename, flag_bits = zinfo._encodeFilenameFlags()
2136+
filename, flag_bits = zinfo._encodeFilenameFlags(self.metadata_encoding)
21372137
centdir = struct.pack(structCentralDir,
21382138
stringCentralDir, create_version,
21392139
zinfo.create_system, extract_version, zinfo.reserved,

0 commit comments

Comments
 (0)