From b730d3fa29d0fb08167e91ac55df51257df96e08 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Sun, 2 Feb 2020 10:42:54 +0000 Subject: [PATCH 1/6] bpo-12915: Added pkgutil.resolve_name(). --- Doc/library/pkgutil.rst | 39 +++++++++++ Lib/pkgutil.py | 70 +++++++++++++++++++ Lib/test/test_pkgutil.py | 36 ++++++++++ .../2020-02-02-10-08-25.bpo-12915.d6r50-.rst | 4 ++ 4 files changed, 149 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 78a515734585837..d40a7784c53d874 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -227,3 +227,42 @@ support. then ``None`` is returned. In particular, the :term:`loader` for :term:`namespace packages ` does not support :meth:`get_data `. + + +.. function:: resolve_name(name) + + Resolves a name to an object. This is used in numerous places in the stdlib + (see bpo-12915) - and also in widely-used third-party packages such as + setuptools. + + It is expected that `name` will be a string in one of the following + formats, where W is shorthand for a valid Python identifier and dot stands + for a literal period in these pseudo-regexes: + + * W(.W)* + * W(.W)*:(W(.W)*)? + + The first form is intended for backward compatibility only. It assumes that + some part of the dotted name is a package, and the rest is an object + somewhere within that package, possibly nested inside other objects. + Because the place where the package stops and the object hierarchy starts + can't be inferred by inspection, repeated attempts to import must be done + with this form. + + In the second form, the caller makes the division point clear through the + provision of a single colon: the dotted name to the left of the colon is a + package to be imported, and the dotted name to the right is the object + hierarchy within that package. Only one import is needed in this form. If + it ends with the colon, then a module object is returned. + + The function will return an object (which might be a module), or raise one + of the following exceptions: + + :exc:`ValueError` -- if `name` isn't in a recognised format. + + :exc:`ImportError` -- if an import failed when it shouldn't have. + + :exc:`AttributeError` -- If a failure occurred when traversing the object + hierarchy within the imported package to get to the desired object. + + .. versionadded:: 3.9 diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index 8474a773e7c7322..bd16b9984184a85 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -7,6 +7,7 @@ import importlib.machinery import os import os.path +import re import sys from types import ModuleType import warnings @@ -635,3 +636,72 @@ def get_data(package, resource): parts.insert(0, os.path.dirname(mod.__file__)) resource_name = os.path.join(*parts) return loader.get_data(resource_name) + + +_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*' +_NAME_PATTERN = re.compile('^(%s)(:(%s)?)?$' % (_DOTTED_WORDS, + _DOTTED_WORDS), re.I) +del _DOTTED_WORDS + +def resolve_name(name): + """ + Resolves a name to an object. See bpo-12915 for the background. + + It is expected that `name` will be a string in one of the following + formats, where W is shorthand for a valid Python identifier and dot stands + for a literal period in these pseudo-regexes: + + W(.W)* + W(.W)*:(W(.W)*)? + + The first form is intended for backward compatibility only. It assumes that + some part of the dotted name is a package, and the rest is an object + somewhere within that package, possibly nested inside other objects. + Because the place where the package stops and the object hierarchy starts + can't be inferred by inspection, repeated attempts to import must be done + with this form. + + In the second form, the caller makes the division point clear through the + provision of a single colon: the dotted name to the left of the colon is a + package to be imported, and the dotted name to the right is the object + hierarchy within that package. Only one import is needed in this form. If + it ends with the colon, then a module object is returned. + + The function will return an object (which might be a module), or raise one + of the following exceptions: + + ValueError - if `name` isn't in a recognised format + ImportError - if an import failed when it shouldn't have + AttributeError - if a failure occurred when traversing the object hierarchy + within the imported package to get to the desired object) + """ + m = _NAME_PATTERN.match(name) + if not m: + raise ValueError('invalid format: %r' % name) + groups = m.groups() + if groups[2]: + # there is a colon - a one-step import is all that's needed + mod = importlib.import_module(groups[0]) + parts = groups[3].split('.') if groups[3] else [] + else: + # no colon - have to iterate to find the package boundary + parts = name.split('.') + modname = parts.pop(0) + # first part *must* be a module/package. + mod = importlib.import_module(modname) + while parts: + p = parts[0] + s = '%s.%s' % (modname, p) + try: + mod = importlib.import_module(s) + parts.pop(0) + modname = s + except ImportError: + break + # if we reach this point, mod is the module, already imported, and + # parts is the list of parts in the object hierarchy to be traversed, or + # an empty list if just the module is wanted. + result = mod + for p in parts: + result = getattr(result, p) + return result diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 2887ce6cc055dab..774e1a7555ec103 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -186,6 +186,42 @@ def test_walk_packages_raises_on_string_or_bytes_input(self): with self.assertRaises((TypeError, ValueError)): list(pkgutil.walk_packages(bytes_input)) + def test_name_resolution(self): + import logging + import logging.handlers + + cases = ( + (None, TypeError), + (1, TypeError), + (2.0, TypeError), + (True, TypeError), + ('', ValueError), + ('?abc', ValueError), + ('abc/foo', ValueError), + ('foo', ImportError), + ('os', os), + ('os.path', os.path), + ('os.foo', AttributeError), + ('os.foo:', ImportError), + ('os.pth:pathsep', ImportError), + ('os.path:pathsep', os.path.pathsep), + ('logging', logging), + ('logging:', logging), + ('logging.handlers', logging.handlers), + ('logging.handlers:', logging.handlers), + ('logging.handlers:SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers:SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), + ('logging.handlers:NoSuchHandler', AttributeError), + ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), + ) + + for s, e_or_o in cases: + try: + o = pkgutil.resolve_name(s) + self.assertEqual(o, e_or_o) + except Exception as e: + self.assertIsInstance(e, e_or_o) class PkgutilPEP302Tests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst b/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst new file mode 100644 index 000000000000000..90ee0bcac7915f3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst @@ -0,0 +1,4 @@ +A new function ``resolve_name`` has been added to the ``pkgutil`` module. +This resolves a string of the form ``'a.b.c.d'`` or ``'a.b:c.d'`` to an +object. In the example, ``a.b`` is a package/module and ``c.d`` is an object +within that package/module reached via recursive attribute access. From b65b9e740772477b1e7c650996344ec91c1e39c2 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Sun, 2 Feb 2020 14:17:13 +0000 Subject: [PATCH 2/6] Fixed typo in documentation. --- Doc/library/pkgutil.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index d40a7784c53d874..f144b21fdcd3006 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -235,7 +235,7 @@ support. (see bpo-12915) - and also in widely-used third-party packages such as setuptools. - It is expected that `name` will be a string in one of the following + It is expected that *name* will be a string in one of the following formats, where W is shorthand for a valid Python identifier and dot stands for a literal period in these pseudo-regexes: @@ -258,7 +258,7 @@ support. The function will return an object (which might be a module), or raise one of the following exceptions: - :exc:`ValueError` -- if `name` isn't in a recognised format. + :exc:`ValueError` -- if *name* isn't in a recognised format. :exc:`ImportError` -- if an import failed when it shouldn't have. From 8796e64525d64c3dd12497ded79a6b8cb2168392 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Thu, 13 Feb 2020 21:38:04 +0000 Subject: [PATCH 3/6] Added some cases to the test. --- Lib/test/test_pkgutil.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 774e1a7555ec103..25c373fa9711b46 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -210,10 +210,14 @@ def test_name_resolution(self): ('logging.handlers', logging.handlers), ('logging.handlers:', logging.handlers), ('logging.handlers:SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers.SysLogHandler', logging.handlers.SysLogHandler), ('logging.handlers:SysLogHandler.LOG_ALERT', logging.handlers.SysLogHandler.LOG_ALERT), + ('logging.handlers.SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), ('logging.handlers:NoSuchHandler', AttributeError), ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError), ) for s, e_or_o in cases: From bdc205e0157afbbff07c20ff7f342552538da3e8 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Fri, 14 Feb 2020 12:16:17 +0000 Subject: [PATCH 4/6] =?UTF-8?q?Addressed=20=C3=89ric=20Araujo's=20review?= =?UTF-8?q?=20comments.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Doc/library/pkgutil.rst | 12 ++++++----- Lib/pkgutil.py | 5 ++--- Lib/test/test_pkgutil.py | 45 +++++++++++++++++++++++++--------------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index f144b21fdcd3006..2066cbb9fc57cee 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -231,16 +231,18 @@ support. .. function:: resolve_name(name) - Resolves a name to an object. This is used in numerous places in the stdlib - (see bpo-12915) - and also in widely-used third-party packages such as - setuptools. + Resolve a name to an object. + + This functionality is used in numerous places in the standard library (see + :issue:`12915`) - and equivalent functionality is also in widely used + third-party packages such as setuptools, Django and Pyramid. It is expected that *name* will be a string in one of the following formats, where W is shorthand for a valid Python identifier and dot stands for a literal period in these pseudo-regexes: - * W(.W)* - * W(.W)*:(W(.W)*)? + * ``W(.W)*`` + * ``W(.W)*:(W(.W)*)?`` The first form is intended for backward compatibility only. It assumes that some part of the dotted name is a package, and the rest is an object diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index bd16b9984184a85..ebf7c415d2ffb06 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -639,13 +639,12 @@ def get_data(package, resource): _DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*' -_NAME_PATTERN = re.compile('^(%s)(:(%s)?)?$' % (_DOTTED_WORDS, - _DOTTED_WORDS), re.I) +_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I) del _DOTTED_WORDS def resolve_name(name): """ - Resolves a name to an object. See bpo-12915 for the background. + Resolve a name to an object. It is expected that `name` will be a string in one of the following formats, where W is shorthand for a valid Python identifier and dot stands diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 25c373fa9711b46..9a9fdd704a632ca 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -190,20 +190,9 @@ def test_name_resolution(self): import logging import logging.handlers - cases = ( - (None, TypeError), - (1, TypeError), - (2.0, TypeError), - (True, TypeError), - ('', ValueError), - ('?abc', ValueError), - ('abc/foo', ValueError), - ('foo', ImportError), + success_cases = ( ('os', os), ('os.path', os.path), - ('os.foo', AttributeError), - ('os.foo:', ImportError), - ('os.pth:pathsep', ImportError), ('os.path:pathsep', os.path.pathsep), ('logging', logging), ('logging:', logging), @@ -215,17 +204,39 @@ def test_name_resolution(self): logging.handlers.SysLogHandler.LOG_ALERT), ('logging.handlers.SysLogHandler.LOG_ALERT', logging.handlers.SysLogHandler.LOG_ALERT), + ('builtins.int', int), + ('builtins.int.from_bytes', int.from_bytes), + ('builtins.ZeroDivisionError', ZeroDivisionError), + ) + + failure_cases = ( + (None, TypeError), + (1, TypeError), + (2.0, TypeError), + (True, TypeError), + ('', ValueError), + ('?abc', ValueError), + ('abc/foo', ValueError), + ('foo', ImportError), + ('os.foo', AttributeError), + ('os.foo:', ImportError), + ('os.pth:pathsep', ImportError), ('logging.handlers:NoSuchHandler', AttributeError), ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('ZeroDivisionError', ImportError), ) - for s, e_or_o in cases: - try: + for s, expected in success_cases: + with self.subTest(s=s): o = pkgutil.resolve_name(s) - self.assertEqual(o, e_or_o) - except Exception as e: - self.assertIsInstance(e, e_or_o) + self.assertEqual(o, expected) + + for s, exc in failure_cases: + with self.subTest(s=s): + with self.assertRaises(exc): + pkgutil.resolve_name(s) + class PkgutilPEP302Tests(unittest.TestCase): From 0c922dd436cde42d4bbd336cb623ab4d6364c2bb Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Fri, 14 Feb 2020 17:33:02 +0000 Subject: [PATCH 5/6] Added more subtest cases. --- Lib/test/test_pkgutil.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 9a9fdd704a632ca..f0d42d40a776843 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -207,6 +207,10 @@ def test_name_resolution(self): ('builtins.int', int), ('builtins.int.from_bytes', int.from_bytes), ('builtins.ZeroDivisionError', ZeroDivisionError), + ('builtins:int', int), + ('builtins:int.from_bytes', int.from_bytes), + ('builtins:ZeroDivisionError', ZeroDivisionError), + ('os:path', os.path), ) failure_cases = ( From 66d6cf1eda79b7113d4357e565529cc0fcb4c63b Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Fri, 14 Feb 2020 20:16:03 +0000 Subject: [PATCH 6/6] Minor tidy-ups based on additional review comments. --- Lib/pkgutil.py | 4 ++-- Lib/test/test_pkgutil.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index ebf7c415d2ffb06..4bc3083ac197eb8 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -676,7 +676,7 @@ def resolve_name(name): """ m = _NAME_PATTERN.match(name) if not m: - raise ValueError('invalid format: %r' % name) + raise ValueError(f'invalid format: {name!r}') groups = m.groups() if groups[2]: # there is a colon - a one-step import is all that's needed @@ -690,7 +690,7 @@ def resolve_name(name): mod = importlib.import_module(modname) while parts: p = parts[0] - s = '%s.%s' % (modname, p) + s = f'{modname}.{p}' try: mod = importlib.import_module(s) parts.pop(0) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index f0d42d40a776843..906150b10495bfb 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -205,10 +205,10 @@ def test_name_resolution(self): ('logging.handlers.SysLogHandler.LOG_ALERT', logging.handlers.SysLogHandler.LOG_ALERT), ('builtins.int', int), - ('builtins.int.from_bytes', int.from_bytes), - ('builtins.ZeroDivisionError', ZeroDivisionError), ('builtins:int', int), + ('builtins.int.from_bytes', int.from_bytes), ('builtins:int.from_bytes', int.from_bytes), + ('builtins.ZeroDivisionError', ZeroDivisionError), ('builtins:ZeroDivisionError', ZeroDivisionError), ('os:path', os.path), )