From 0a6335e4750cbbfa7fd6a7e0ae1c60aaa087800f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 11 Jul 2024 12:17:45 +0300 Subject: [PATCH 1/4] gh-120932: Check the pyc file mtime at import --- Lib/importlib/_bootstrap_external.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index bf14d57b2503ead..88316718e9f5531 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -26,6 +26,7 @@ import _imp import _io import sys +import time import _warnings import marshal @@ -725,8 +726,8 @@ def _classify_pyc(data, name, exc_details): return flags -def _validate_timestamp_pyc(data, source_mtime, source_size, name, - exc_details): +def _validate_timestamp_pyc(self, data, source_mtime, source_size, name, + bytecode_path, exc_details): """Validate a pyc against the source last-modified time. *data* is the contents of the pyc file. (Only the first 16 bytes are @@ -738,19 +739,32 @@ def _validate_timestamp_pyc(data, source_mtime, source_size, name, *name* is the name of the module being imported. It is used for logging. + *bytecode_path* is the path of the pyc file. + *exc_details* is a dictionary passed to ImportError if it raised for improved debugging. An ImportError is raised if the bytecode is stale. """ - if _unpack_uint32(data[8:12]) != (source_mtime & 0xFFFFFFFF): + timestamp = _unpack_uint32(data[8:12]) + if timestamp != (int(source_mtime) & 0xFFFFFFFF): message = f'bytecode is stale for {name!r}' _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) if (source_size is not None and _unpack_uint32(data[12:16]) != (source_size & 0xFFFFFFFF)): raise ImportError(f'bytecode is stale for {name!r}', **exc_details) + if time.time() - source_mtime < 2: + try: + bytecode_mtime = self.path_mtime(bytecode_path) + except OSError: + pass + else: + if bytecode_mtime < source_mtime: + message = f'bytecode may be stale for {name!r}' + _bootstrap._verbose_message('{}', message) + raise ImportError(message, **exc_details) def _validate_hash_pyc(data, source_hash, name, exc_details): @@ -794,7 +808,7 @@ def _code_to_timestamp_pyc(code, mtime=0, source_size=0): "Produce the data for a timestamp-based pyc." data = bytearray(MAGIC_NUMBER) data.extend(_pack_uint32(0)) - data.extend(_pack_uint32(mtime)) + data.extend(_pack_uint32(int(mtime))) data.extend(_pack_uint32(source_size)) data.extend(marshal.dumps(code)) return data @@ -1111,7 +1125,7 @@ def get_code(self, fullname): except OSError: pass else: - source_mtime = int(st['mtime']) + source_mtime = st['mtime'] try: data = self.get_data(bytecode_path) except OSError: @@ -1139,10 +1153,12 @@ def get_code(self, fullname): exc_details) else: _validate_timestamp_pyc( + self, data, source_mtime, st['size'], fullname, + bytecode_path, exc_details, ) except (ImportError, EOFError): From 5f2d87ede7d80f82b601b028b266ef2cf429df62 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 11 Jul 2024 16:00:19 +0300 Subject: [PATCH 2/4] Add a test (flawed). --- Lib/importlib/_bootstrap_external.py | 2 +- Lib/test/test_import/__init__.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 88316718e9f5531..2db8fcf3725eff2 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -757,7 +757,7 @@ def _validate_timestamp_pyc(self, data, source_mtime, source_size, name, raise ImportError(f'bytecode is stale for {name!r}', **exc_details) if time.time() - source_mtime < 2: try: - bytecode_mtime = self.path_mtime(bytecode_path) + bytecode_mtime = self.path_stats(bytecode_path)['mtime'] except OSError: pass else: diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index e29097baaf53ae1..607ee5bbfcaad3f 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1518,6 +1518,31 @@ def test_recompute_pyc_same_second(self): m = __import__(TESTFN) self.assertEqual(m.x, 5) + def test_recompute_pyc_same_second_same_size(self): + # Even when the source file doesn't change timestamp (truncated + # to seconds) and size, the difference between the source and + # the bytecode timestamps is enough to trigger recomputation of + # the pyc file. + prev_mtime = 0 + for i in range(10, 100): + with self.subTest(i): + while True: + with open(self.source, 'w', encoding='utf-8') as fp: + print(f"x = {i}", file=fp) + mtime = os.stat(self.source).st_mtime + if mtime > prev_mtime: + break + time.sleep(1e-3) + self.assertGreater(mtime, prev_mtime) + prev_mtime = mtime + + m = __import__(TESTFN) + self.assertEqual(m.x, i) + unload(TESTFN) + m = __import__(TESTFN) + self.assertEqual(m.x, i) + unload(TESTFN) + class TestSymbolicallyLinkedPackage(unittest.TestCase): package_name = 'sample' From 7e1cb8a8c92da15ad8dac566bf3ad9aadf020c56 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 12 Jul 2024 19:50:49 +0300 Subject: [PATCH 3/4] Make test more reliable. --- Lib/importlib/_bootstrap_external.py | 2 +- Lib/test/test_import/__init__.py | 39 +++++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 2db8fcf3725eff2..cbb4b6d6651b6d3 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -762,7 +762,7 @@ def _validate_timestamp_pyc(self, data, source_mtime, source_size, name, pass else: if bytecode_mtime < source_mtime: - message = f'bytecode may be stale for {name!r}' + message = f'bytecode is stale for {name!r}' _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 607ee5bbfcaad3f..1151ecd8b835b47 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1523,25 +1523,28 @@ def test_recompute_pyc_same_second_same_size(self): # to seconds) and size, the difference between the source and # the bytecode timestamps is enough to trigger recomputation of # the pyc file. - prev_mtime = 0 + pyc_file = importlib.util.cache_from_source(self.source) for i in range(10, 100): - with self.subTest(i): - while True: - with open(self.source, 'w', encoding='utf-8') as fp: - print(f"x = {i}", file=fp) - mtime = os.stat(self.source).st_mtime - if mtime > prev_mtime: - break - time.sleep(1e-3) - self.assertGreater(mtime, prev_mtime) - prev_mtime = mtime - - m = __import__(TESTFN) - self.assertEqual(m.x, i) - unload(TESTFN) - m = __import__(TESTFN) - self.assertEqual(m.x, i) - unload(TESTFN) + try: + bytecode_mtime = os.stat(pyc_file).st_mtime + except FileNotFoundError: + # The pyc file has not yet be created. + bytecode_mtime = 0 + delay = 1e-3 + for j in range(10): + time.sleep(delay) + with open(self.source, 'w', encoding='utf-8') as fp: + print(f"x = {i}", file=fp) + if os.stat(self.source).st_mtime > bytecode_mtime: + break + delay *= 2 + + m = __import__(TESTFN) + self.assertEqual(m.x, i) + unload(TESTFN) + m = __import__(TESTFN) + self.assertEqual(m.x, i) + unload(TESTFN) class TestSymbolicallyLinkedPackage(unittest.TestCase): From 4b1db69bbf1c6269bba8247964f28f10c1539cfc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 12 Jul 2024 20:02:25 +0300 Subject: [PATCH 4/4] Add a NEWS entry. --- .../2024-07-12-19-57-12.gh-issue-121620.AgNaIS.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-07-12-19-57-12.gh-issue-121620.AgNaIS.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-12-19-57-12.gh-issue-121620.AgNaIS.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-12-19-57-12.gh-issue-121620.AgNaIS.rst new file mode 100644 index 000000000000000..5a7374a49f4474b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-12-19-57-12.gh-issue-121620.AgNaIS.rst @@ -0,0 +1,2 @@ +Import checks now also the modification time of the ``.pyc`` file for +recently modified source file. This reduces chance of using stale bytecode.