From 5806de1ebe998ccba452c6aa6824d9b6ada8f4ca Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 29 Mar 2021 20:43:53 -0400 Subject: [PATCH 1/4] Unify section parser behavior. Removes reliance on regex (ref #292). Fixes #295. --- importlib_metadata/__init__.py | 72 ++++++++++++++-------------------- 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index c83e3e9d..b545e1c2 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -65,20 +65,25 @@ class Sectioned: A simple entry point config parser for performance >>> res = Sectioned.get_sections(Sectioned._sample) - >>> sec, values = next(res) + >>> sec, pair = next(res) >>> sec 'sec1' - >>> [(key, value) for key, value in values] - [('a', '1'), ('b', '2')] - >>> sec, values = next(res) + >>> tuple(pair) + ('a', '1') + >>> sec, pair = next(res) + >>> tuple(pair) + ('b', '2') + >>> sec, pair = next(res) >>> sec 'sec2' - >>> [(key, value) for key, value in values] - [('a', '2')] + >>> tuple(pair) + ('a', '2') >>> list(res) [] """ + Pair = collections.namedtuple('Pair', 'name value') + _sample = textwrap.dedent( """ [sec1] @@ -91,25 +96,25 @@ class Sectioned: """ ).lstrip() - def __init__(self): - self.section = None - - def __call__(self, line): - if line.startswith('[') and line.endswith(']'): - # new section - self.section = line.strip('[]') - return - return self.section - @classmethod def get_sections(cls, text): - lines = filter(cls.valid, map(str.strip, text.splitlines())) return ( - (section, map(cls.parse_value, values)) - for section, values in itertools.groupby(lines, cls()) - if section is not None + (section.name, cls.parse_value(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None ) + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Sectioned.Pair(name, value) + @staticmethod def valid(line): return line and not line.startswith('#') @@ -256,8 +261,7 @@ def _from_text(cls, text): def _parse_groups(text): return ( (name, value, section) - for section, values in Sectioned.get_sections(text) - for name, value in values + for section, (name, value) in Sectioned.get_sections(text) ) @@ -573,24 +577,7 @@ def _read_egg_info_reqs(self): @classmethod def _deps_from_requires_text(cls, source): - section_pairs = cls._read_sections(source.splitlines()) - sections = { - section: list(map(operator.itemgetter('line'), results)) - for section, results in itertools.groupby( - section_pairs, operator.itemgetter('section') - ) - } - return cls._convert_egg_info_reqs_to_simple_reqs(sections) - - @staticmethod - def _read_sections(lines): - section = None - for line in filter(None, lines): - section_match = re.match(r'\[(.*)\]$', line) - if section_match: - section = section_match.group(1) - continue - yield locals() + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) @staticmethod def _convert_egg_info_reqs_to_simple_reqs(sections): @@ -615,9 +602,8 @@ def parse_condition(section): conditions = list(filter(None, [markers, make_condition(extra)])) return '; ' + ' and '.join(conditions) if conditions else '' - for section, deps in sections.items(): - for dep in deps: - yield dep + parse_condition(section) + for section in sections: + yield section.value + parse_condition(section.name) class DistributionFinder(MetaPathFinder): From bf6a3b13b3eec27e48abed353761a29676d6a0ff Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 29 Mar 2021 21:11:53 -0400 Subject: [PATCH 2/4] Use Pair in other places and extract it to _collections. --- importlib_metadata/__init__.py | 36 +++++++++++++----------------- importlib_metadata/_collections.py | 6 +++++ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index b545e1c2..5abc1e9e 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -15,7 +15,7 @@ import contextlib import collections -from ._collections import FreezableDefaultDict +from ._collections import FreezableDefaultDict, Pair from ._compat import ( NullFinder, Protocol, @@ -64,26 +64,24 @@ class Sectioned: """ A simple entry point config parser for performance - >>> res = Sectioned.get_sections(Sectioned._sample) - >>> sec, pair = next(res) - >>> sec + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name 'sec1' - >>> tuple(pair) + >>> tuple(item.value) ('a', '1') - >>> sec, pair = next(res) - >>> tuple(pair) + >>> item = next(res) + >>> tuple(item.value) ('b', '2') - >>> sec, pair = next(res) - >>> sec + >>> item = next(res) + >>> item.name 'sec2' - >>> tuple(pair) + >>> tuple(item.value) ('a', '2') >>> list(res) [] """ - Pair = collections.namedtuple('Pair', 'name value') - _sample = textwrap.dedent( """ [sec1] @@ -97,9 +95,9 @@ class Sectioned: ).lstrip() @classmethod - def get_sections(cls, text): + def section_pairs(cls, text): return ( - (section.name, cls.parse_value(section.value)) + section._replace(value=Pair.parse(section.value)) for section in cls.read(text, filter_=cls.valid) if section.name is not None ) @@ -113,16 +111,12 @@ def read(text, filter_=None): if section_match: name = value.strip('[]') continue - yield Sectioned.Pair(name, value) + yield Pair(name, value) @staticmethod def valid(line): return line and not line.startswith('#') - @staticmethod - def parse_value(line): - return map(str.strip, line.split("=", 1)) - class EntryPoint( PyPy_repr, collections.namedtuple('EntryPointBase', 'name value group') @@ -260,8 +254,8 @@ def _from_text(cls, text): @staticmethod def _parse_groups(text): return ( - (name, value, section) - for section, (name, value) in Sectioned.get_sections(text) + (item.value.name, item.value.value, item.name) + for item in Sectioned.section_pairs(text) ) diff --git a/importlib_metadata/_collections.py b/importlib_metadata/_collections.py index 6aa17c84..cf0954e1 100644 --- a/importlib_metadata/_collections.py +++ b/importlib_metadata/_collections.py @@ -22,3 +22,9 @@ def __missing__(self, key): def freeze(self): self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) From c891d5065be9948bc593808884b85c3ad364dbfd Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 29 Mar 2021 21:26:51 -0400 Subject: [PATCH 3/4] Expand and simplify Sectioned doctest. --- importlib_metadata/__init__.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/importlib_metadata/__init__.py b/importlib_metadata/__init__.py index 5abc1e9e..7c5eb2c7 100644 --- a/importlib_metadata/__init__.py +++ b/importlib_metadata/__init__.py @@ -64,20 +64,27 @@ class Sectioned: """ A simple entry point config parser for performance + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + >>> res = Sectioned.section_pairs(Sectioned._sample) >>> item = next(res) >>> item.name 'sec1' - >>> tuple(item.value) - ('a', '1') + >>> item.value + Pair(name='a', value='1') >>> item = next(res) - >>> tuple(item.value) - ('b', '2') + >>> item.value + Pair(name='b', value='2') >>> item = next(res) >>> item.name 'sec2' - >>> tuple(item.value) - ('a', '2') + >>> item.value + Pair(name='a', value='2') >>> list(res) [] """ From 244fc482479925b75851194989518d97061a1b28 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 29 Mar 2021 21:36:36 -0400 Subject: [PATCH 4/4] Update changelog. --- CHANGES.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 8427a258..696da526 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,8 @@ +v3.10.0 +======= + +* #295: Internal refactoring to unify section parsing logic. + v3.9.1 ======