Add more flexible parsing and formating

This commit is contained in:
Manuel Barkhau 2019-01-06 14:38:20 +01:00
parent 32447b03d4
commit 9eda61d95b
13 changed files with 932 additions and 359 deletions

View file

@ -5,50 +5,14 @@
# SPDX-License-Identifier: MIT
"""Parse PyCalVer strings from files."""
import re
import logging
import typing as typ
from . import patterns
log = logging.getLogger("pycalver.parse")
VALID_RELEASE_VALUES = ("alpha", "beta", "dev", "rc", "post", "final")
PATTERN_ESCAPES = [
("\u005c", "\u005c\u005c"),
("-" , "\u005c-"),
("." , "\u005c."),
("+" , "\u005c+"),
("*" , "\u005c*"),
("{" , "\u005c{{"),
("}" , "\u005c}}"),
("[" , "\u005c["),
("]" , "\u005c]"),
("(" , "\u005c("),
(")" , "\u005c)"),
]
# NOTE (mb 2018-09-03): These are matchers for parts, which are
# used in the patterns, they're not for validation. This means
# that they may find strings, which are not valid pycalver
# strings, when parsed in their full context. For such cases,
# the patterns should be expanded.
RE_PATTERN_PARTS = {
'pep440_version': r"\d{6}\.[1-9]\d*(a|b|dev|rc|post)?\d*",
'version' : r"v\d{6}\.\d{4,}(\-(alpha|beta|dev|rc|post|final))?",
'calver' : r"v\d{6}",
'year' : r"\d{4}",
'month' : r"\d{2}",
'build' : r"\.\d{4,}",
'build_no' : r"\d{4,}",
'release' : r"(\-(alpha|beta|dev|rc|post|final))?",
'release_tag' : r"(alpha|beta|dev|rc|post|final)?",
}
class PatternMatch(typ.NamedTuple):
"""Container to mark a version string in a file."""
@ -62,26 +26,10 @@ class PatternMatch(typ.NamedTuple):
PatternMatches = typ.Iterable[PatternMatch]
def compile_pattern(pattern: str) -> typ.Pattern[str]:
pattern_tmpl = pattern
for char, escaped in PATTERN_ESCAPES:
pattern_tmpl = pattern_tmpl.replace(char, escaped)
# undo escaping only for valid part names
for part_name in RE_PATTERN_PARTS.keys():
pattern_tmpl = pattern_tmpl.replace(
"\u005c{{" + part_name + "\u005c}}", "{" + part_name + "}"
)
pattern_str = pattern_tmpl.format(**RE_PATTERN_PARTS)
return re.compile(pattern_str)
def _iter_for_pattern(lines: typ.List[str], pattern: str) -> PatternMatches:
# The pattern is escaped, so that everything besides the format
# string variables is treated literally.
pattern_re = compile_pattern(pattern)
pattern_re = patterns.compile_pattern(pattern)
for lineno, line in enumerate(lines):
match = pattern_re.search(line)
@ -93,12 +41,12 @@ def iter_matches(lines: typ.List[str], patterns: typ.List[str]) -> PatternMatche
"""Iterate over all matches of any pattern on any line.
>>> lines = ["__version__ = 'v201712.0002-alpha'"]
>>> patterns = ["{version}", "{pep440_version}"]
>>> patterns = ["{pycalver}", "{pep440_pycalver}"]
>>> matches = list(iter_matches(lines, patterns))
>>> assert matches[0] == PatternMatch(
... lineno = 0,
... line = "__version__ = 'v201712.0002-alpha'",
... pattern= "{version}",
... pattern= "{pycalver}",
... span = (15, 33),
... match = "v201712.0002-alpha",
... )