diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 205ba7357abbcc..9e236534ae3770 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -514,8 +514,7 @@ jobs: strategy: fail-fast: false matrix: - # sanitizer: [address, undefined, memory] -- memory skipped temporarily until GH-116886 is solved. - sanitizer: [address, undefined] + sanitizer: [address, undefined, memory] steps: - name: Build fuzzers (${{ matrix.sanitizer }}) id: build diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst index 47c81f0e63603d..7a8c837307e60a 100644 --- a/Doc/library/zipimport.rst +++ b/Doc/library/zipimport.rst @@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for corresponding :file:`.pyc` file, meaning that if a ZIP archive doesn't contain :file:`.pyc` files, importing may be rather slow. +.. versionchanged:: 3.13 + ZIP64 is supported + .. versionchanged:: 3.8 Previously, ZIP archives with an archive comment were not supported. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e6234bf974ea47..5a5c506d83d735 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -700,6 +700,12 @@ xml.etree.ElementTree :func:`~xml.etree.ElementTree.iterparse` for explicit cleaning up. (Contributed by Serhiy Storchaka in :gh:`69893`.) +zipimport +--------- + +* Gains support for ZIP64 format files. Everybody loves huge code right? + (Contributed by Tim Hatch in :gh:`94146`.) + Optimizations ============= diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 4749a627c50c42..0a11dc9efc252c 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -81,6 +81,11 @@ def _pack_uint32(x): return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little') +def _unpack_uint64(data): + """Convert 8 bytes in little-endian to an integer.""" + assert len(data) == 8 + return int.from_bytes(data, 'little') + def _unpack_uint32(data): """Convert 4 bytes in little-endian to an integer.""" assert len(data) == 4 diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index a60f087ef2816e..52cb4d6e187855 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -2657,6 +2657,7 @@ def test_cli_converters(self): float() int() long() + object() Py_ssize_t() size_t() unsigned_int() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 4bf158247fa2ec..00b415f43c49b8 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -13,6 +13,7 @@ import locale import os import pickle +import platform import select import selectors import shutil @@ -4085,9 +4086,15 @@ def test_eventfd_select(self): @unittest.skipUnless(hasattr(os, 'timerfd_create'), 'requires os.timerfd_create') @support.requires_linux_version(2, 6, 30) class TimerfdTests(unittest.TestCase): - # Tolerate a difference of 1 ms - CLOCK_RES_NS = 1_000_000 - CLOCK_RES = CLOCK_RES_NS * 1e-9 + # 1 ms accuracy is reliably achievable on every platform except Android + # emulators, where we allow 10 ms (gh-108277). + if sys.platform == "android" and platform.android_ver().is_emulator: + CLOCK_RES_PLACES = 2 + else: + CLOCK_RES_PLACES = 3 + + CLOCK_RES = 10 ** -CLOCK_RES_PLACES + CLOCK_RES_NS = 10 ** (9 - CLOCK_RES_PLACES) def timerfd_create(self, *args, **kwargs): fd = os.timerfd_create(*args, **kwargs) @@ -4109,18 +4116,18 @@ def test_timerfd_initval(self): # 1st call next_expiration, interval2 = os.timerfd_settime(fd, initial=initial_expiration, interval=interval) - self.assertAlmostEqual(interval2, 0.0, places=3) - self.assertAlmostEqual(next_expiration, 0.0, places=3) + self.assertAlmostEqual(interval2, 0.0, places=self.CLOCK_RES_PLACES) + self.assertAlmostEqual(next_expiration, 0.0, places=self.CLOCK_RES_PLACES) # 2nd call next_expiration, interval2 = os.timerfd_settime(fd, initial=initial_expiration, interval=interval) - self.assertAlmostEqual(interval2, interval, places=3) - self.assertAlmostEqual(next_expiration, initial_expiration, places=3) + self.assertAlmostEqual(interval2, interval, places=self.CLOCK_RES_PLACES) + self.assertAlmostEqual(next_expiration, initial_expiration, places=self.CLOCK_RES_PLACES) # timerfd_gettime next_expiration, interval2 = os.timerfd_gettime(fd) - self.assertAlmostEqual(interval2, interval, places=3) - self.assertAlmostEqual(next_expiration, initial_expiration, places=3) + self.assertAlmostEqual(interval2, interval, places=self.CLOCK_RES_PLACES) + self.assertAlmostEqual(next_expiration, initial_expiration, places=self.CLOCK_RES_PLACES) def test_timerfd_non_blocking(self): fd = self.timerfd_create(time.CLOCK_REALTIME, flags=os.TFD_NONBLOCK) @@ -4174,8 +4181,8 @@ def test_timerfd_interval(self): # timerfd_gettime next_expiration, interval2 = os.timerfd_gettime(fd) - self.assertAlmostEqual(interval2, interval, places=3) - self.assertAlmostEqual(next_expiration, initial_expiration, places=3) + self.assertAlmostEqual(interval2, interval, places=self.CLOCK_RES_PLACES) + self.assertAlmostEqual(next_expiration, initial_expiration, places=self.CLOCK_RES_PLACES) count = 3 t = time.perf_counter() @@ -4206,8 +4213,8 @@ def test_timerfd_TFD_TIMER_ABSTIME(self): # timerfd_gettime # Note: timerfd_gettime returns relative values even if TFD_TIMER_ABSTIME is specified. next_expiration, interval2 = os.timerfd_gettime(fd) - self.assertAlmostEqual(interval2, interval, places=3) - self.assertAlmostEqual(next_expiration, offset, places=3) + self.assertAlmostEqual(interval2, interval, places=self.CLOCK_RES_PLACES) + self.assertAlmostEqual(next_expiration, offset, places=self.CLOCK_RES_PLACES) t = time.perf_counter() count_signaled = self.read_count_signaled(fd) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index c12798d221e9b7..ae49700294330c 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -128,6 +128,10 @@ def makeZip(self, files, zipName=TEMP_ZIP, **kw): f.write(stuff) f.write(data) + def getZip64Files(self): + # This is the simplest way to make zipfile generate the zip64 EOCD block + return {f"f{n}.py": (NOW, test_src) for n in range(65537)} + def doTest(self, expected_ext, files, *modules, **kw): self.makeZip(files, **kw) @@ -798,6 +802,14 @@ def testLargestPossibleComment(self): files = {TESTMOD + ".py": (NOW, test_src)} self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1)) + def testZip64(self): + files = self.getZip64Files() + self.doTest(".py", files, "f6") + + def testZip64CruftAndComment(self): + files = self.getZip64Files() + self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1)) + @support.requires_zlib() class CompressedZipImportTestCase(UncompressedZipImportTestCase): diff --git a/Lib/zipimport.py b/Lib/zipimport.py index 823a82ee830465..21d2dca46f569b 100644 --- a/Lib/zipimport.py +++ b/Lib/zipimport.py @@ -15,7 +15,7 @@ #from importlib import _bootstrap_external #from importlib import _bootstrap # for _verbose_message import _frozen_importlib_external as _bootstrap_external -from _frozen_importlib_external import _unpack_uint16, _unpack_uint32 +from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64 import _frozen_importlib as _bootstrap # for _verbose_message import _imp # for check_hash_based_pycs import _io # for open @@ -40,8 +40,14 @@ class ZipImportError(ImportError): _module_type = type(sys) END_CENTRAL_DIR_SIZE = 22 -STRING_END_ARCHIVE = b'PK\x05\x06' +END_CENTRAL_DIR_SIZE_64 = 56 +END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20 +STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature +STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature +STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature MAX_COMMENT_LEN = (1 << 16) - 1 +MAX_UINT32 = 0xffffffff +ZIP64_EXTRA_TAG = 0x1 class zipimporter(_bootstrap_external._LoaderBasics): """zipimporter(archivepath) -> zipimporter object @@ -356,49 +362,72 @@ def _read_directory(archive): # to not cause problems when some runs 'python3 /dev/fd/9 9= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos): + # Zip64 at "correct" offset from standard EOCD + buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64] + if len(buffer) != END_CENTRAL_DIR_SIZE_64: + raise ZipImportError( + f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte " + f"zip64 central directory, but read {len(buffer)} bytes.", + path=archive) + header_position = file_size - len(data) + pos64 + + central_directory_size = _unpack_uint64(buffer[40:48]) + central_directory_position = _unpack_uint64(buffer[48:56]) + num_entries = _unpack_uint64(buffer[24:32]) + elif pos >= 0: buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] if len(buffer) != END_CENTRAL_DIR_SIZE: raise ZipImportError(f"corrupt Zip file: {archive!r}", path=archive) + header_position = file_size - len(data) + pos - header_size = _unpack_uint32(buffer[12:16]) - header_offset = _unpack_uint32(buffer[16:20]) - if header_position < header_size: + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + central_directory_size = _unpack_uint32(buffer[12:16]) + central_directory_position = _unpack_uint32(buffer[16:20]) + num_entries = _unpack_uint16(buffer[8:10]) + + # N.b. if someday you want to prefer the standard (non-zip64) EOCD, + # you need to adjust position by 76 for arc to be 0. + else: + raise ZipImportError(f'not a Zip file: {archive!r}', + path=archive) + + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + # XXX: These are cursory checks but are not as exact or strict as they + # could be. Checking the arc-adjusted value is probably good too. + if header_position < central_directory_size: raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) - if header_position < header_offset: + if header_position < central_directory_position: raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) - header_position -= header_size - arc_offset = header_position - header_offset + header_position -= central_directory_size + # On just-a-zipfile these values are the same and arc_offset is zero; if + # the file has some bytes prepended, `arc_offset` is the number of such + # bytes. This is used for pex as well as self-extracting .exe. + arc_offset = header_position - central_directory_position if arc_offset < 0: raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) @@ -415,6 +444,11 @@ def _read_directory(archive): raise EOFError('EOF read where not expected') # Start of file header if buffer[:4] != b'PK\x01\x02': + if count != num_entries: + raise ZipImportError( + f"mismatched num_entries: {count} should be {num_entries} in {archive!r}", + path=archive, + ) break # Bad: Central Dir File Header if len(buffer) != 46: raise EOFError('EOF read where not expected') @@ -430,9 +464,6 @@ def _read_directory(archive): comment_size = _unpack_uint16(buffer[32:34]) file_offset = _unpack_uint32(buffer[42:46]) header_size = name_size + extra_size + comment_size - if file_offset > header_offset: - raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) - file_offset += arc_offset try: name = fp.read(name_size) @@ -444,7 +475,10 @@ def _read_directory(archive): # slower than reading the data because fseek flushes stdio's # internal buffers. See issue #8745. try: - if len(fp.read(header_size - name_size)) != header_size - name_size: + extra_data_len = header_size - name_size + extra_data = memoryview(fp.read(extra_data_len)) + + if len(extra_data) != extra_data_len: raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) except OSError: raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) @@ -461,6 +495,60 @@ def _read_directory(archive): name = name.replace('/', path_sep) path = _bootstrap_external._path_join(archive, name) + + # Ordering matches unpacking below. + if ( + file_size == MAX_UINT32 or + data_size == MAX_UINT32 or + file_offset == MAX_UINT32 + ): + # need to decode extra_data looking for a zip64 extra (which might not + # be present) + while extra_data: + if len(extra_data) < 4: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + tag = _unpack_uint16(extra_data[:2]) + size = _unpack_uint16(extra_data[2:4]) + if len(extra_data) < 4 + size: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + if tag == ZIP64_EXTRA_TAG: + if (len(extra_data) - 4) % 8 != 0: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + num_extra_values = (len(extra_data) - 4) // 8 + if num_extra_values > 3: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q", + extra_data, offset=4) + + # N.b. Here be dragons: the ordering of these is different than + # the header fields, and it's really easy to get it wrong since + # naturally-occuring zips that use all 3 are >4GB + if file_size == MAX_UINT32: + file_size = values.pop(0) + if data_size == MAX_UINT32: + data_size = values.pop(0) + if file_offset == MAX_UINT32: + file_offset = values.pop(0) + + break + + # For a typical zip, this bytes-slicing only happens 2-3 times, on + # small data like timestamps and filesizes. + extra_data = extra_data[4+size:] + else: + _bootstrap._verbose_message( + "zipimport: suspected zip64 but no zip64 extra for {!r}", + path, + ) + # XXX These two statements seem swapped because `central_directory_position` + # is a position within the actual file, but `file_offset` (when compared) is + # as encoded in the entry, not adjusted for this file. + # N.b. this must be after we've potentially read the zip64 extra which can + # change `file_offset`. + if file_offset > central_directory_position: + raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) + file_offset += arc_offset + t = (path, compress, data_size, file_size, file_offset, time, date, crc) files[name] = t count += 1 diff --git a/Misc/NEWS.d/next/Library/2022-06-22-14-45-32.gh-issue-89739.CqZcRL.rst b/Misc/NEWS.d/next/Library/2022-06-22-14-45-32.gh-issue-89739.CqZcRL.rst new file mode 100644 index 00000000000000..0358c0107cb697 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-06-22-14-45-32.gh-issue-89739.CqZcRL.rst @@ -0,0 +1 @@ +The :mod:`zipimport` module can now read ZIP64 files. diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 922c927b37d249..07db46b09ae5f5 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -1584,14 +1584,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "c23ac158b238c368389dc86bfc315263e5c0e57785da74144aea2cab9a3d51a2" + "checksumValue": "e31e4ca10da91c585793c0eaf1b98aee3cb43e3a58d3d8d478593e5a6bd82927" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/521af282fdf6d60227335120f18ae9309a4b8e8c.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:521af282fdf6d60227335120f18ae9309a4b8e8c:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1599,7 +1599,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "521af282fdf6d60227335120f18ae9309a4b8e8c" + "versionInfo": "bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0" }, { "SPDXID": "SPDXRef-PACKAGE-libb2", diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 6aa4946ee227e7..5c1851f09338a0 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -183,6 +183,20 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: ), ) + # HACL* specifies its expected rev in a refresh script. + if package["name"] == "hacl-star": + hacl_refresh_sh = (CPYTHON_ROOT_DIR / "Modules/_hacl/refresh.sh").read_text() + hacl_expected_rev_match = re.search( + r"expected_hacl_star_rev=([0-9a-f]{40})", + hacl_refresh_sh + ) + hacl_expected_rev = hacl_expected_rev_match and hacl_expected_rev_match.group(1) + + error_if( + hacl_expected_rev != version, + "HACL* SBOM version doesn't match value in 'Modules/_hacl/refresh.sh'" + ) + # License must be on the approved list for SPDX. license_concluded = package["licenseConcluded"] error_if( diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index c9641cb9c82bf7..ea480e61ba9a2b 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -56,7 +56,7 @@ from libclinic.block_parser import Block, BlockParser from libclinic.crenderdata import CRenderData, Include, TemplateDict from libclinic.converter import ( - CConverter, CConverterClassT, + CConverter, CConverterClassT, ConverterType, converters, legacy_converters) @@ -1988,13 +1988,38 @@ def parse_file( libclinic.write_file(output, cooked) +@functools.cache +def _create_parser_base_namespace() -> dict[str, Any]: + ns = dict( + CConverter=CConverter, + CReturnConverter=CReturnConverter, + buffer=buffer, + robuffer=robuffer, + rwbuffer=rwbuffer, + unspecified=unspecified, + NoneType=NoneType, + ) + for name, converter in converters.items(): + ns[f'{name}_converter'] = converter + for name, return_converter in return_converters.items(): + ns[f'{name}_return_converter'] = return_converter + return ns + + +def create_parser_namespace() -> dict[str, Any]: + base_namespace = _create_parser_base_namespace() + return base_namespace.copy() + + + class PythonParser: def __init__(self, clinic: Clinic) -> None: pass def parse(self, block: Block) -> None: + namespace = create_parser_namespace() with contextlib.redirect_stdout(io.StringIO()) as s: - exec(block.input) + exec(block.input, namespace) block.output = s.getvalue() @@ -3443,7 +3468,6 @@ class float_return_converter(double_return_converter): def eval_ast_expr( node: ast.expr, - globals: dict[str, Any], *, filename: str = '-' ) -> Any: @@ -3460,8 +3484,9 @@ def eval_ast_expr( node = node.value expr = ast.Expression(node) + namespace = create_parser_namespace() co = compile(expr, filename, 'eval') - fn = FunctionType(co, globals) + fn = FunctionType(co, namespace) return fn() @@ -4463,12 +4488,11 @@ def parse_converter( case ast.Name(name): return name, False, {} case ast.Call(func=ast.Name(name)): - symbols = globals() kwargs: ConverterArgs = {} for node in annotation.keywords: if not isinstance(node.arg, str): fail("Cannot use a kwarg splat in a function-call annotation") - kwargs[node.arg] = eval_ast_expr(node.value, symbols) + kwargs[node.arg] = eval_ast_expr(node.value) return name, False, kwargs case _: fail( @@ -4984,25 +5008,21 @@ def run_clinic(parser: argparse.ArgumentParser, ns: argparse.Namespace) -> None: parser.error( "can't specify --converters and a filename at the same time" ) - converters: list[tuple[str, str]] = [] - return_converters: list[tuple[str, str]] = [] - ignored = set(""" - add_c_converter - add_c_return_converter - add_default_legacy_c_converter - add_legacy_c_converter - """.strip().split()) - module = globals() - for name in module: - for suffix, ids in ( - ("_return_converter", return_converters), - ("_converter", converters), - ): - if name in ignored: - continue - if name.endswith(suffix): - ids.append((name, name.removesuffix(suffix))) - break + AnyConverterType = ConverterType | ReturnConverterType + converter_list: list[tuple[str, AnyConverterType]] = [] + return_converter_list: list[tuple[str, AnyConverterType]] = [] + + for name, converter in converters.items(): + converter_list.append(( + name, + converter, + )) + for name, return_converter in return_converters.items(): + return_converter_list.append(( + name, + return_converter + )) + print() print("Legacy converters:") @@ -5012,15 +5032,17 @@ def run_clinic(parser: argparse.ArgumentParser, ns: argparse.Namespace) -> None: print() for title, attribute, ids in ( - ("Converters", 'converter_init', converters), - ("Return converters", 'return_converter_init', return_converters), + ("Converters", 'converter_init', converter_list), + ("Return converters", 'return_converter_init', return_converter_list), ): print(title + ":") + + ids.sort(key=lambda item: item[0].lower()) longest = -1 - for name, short_name in ids: - longest = max(longest, len(short_name)) - for name, short_name in sorted(ids, key=lambda x: x[1].lower()): - cls = module[name] + for name, _ in ids: + longest = max(longest, len(name)) + + for name, cls in ids: callable = getattr(cls, attribute, None) if not callable: continue @@ -5033,7 +5055,7 @@ def run_clinic(parser: argparse.ArgumentParser, ns: argparse.Namespace) -> None: else: s = parameter_name parameters.append(s) - print(' {}({})'.format(short_name, ', '.join(parameters))) + print(' {}({})'.format(name, ', '.join(parameters))) print() print("All converters also accept (c_default=None, py_default=None, annotation=None).") print("All return converters also accept (py_default=None).")