# This file is part of the pycalver project # https://github.com/mbarkhau/pycalver # # Copyright (c) 2018-2020 Manuel Barkhau (mbarkhau@gmail.com) - MIT License # SPDX-License-Identifier: MIT """Compose Regular Expressions from Patterns. >>> pattern = compile_pattern("vYYYY0M.BUILD[-RELEASE]") >>> version_info = pattern.regexp.match("v201712.0123-alpha") >>> assert version_info.groupdict() == { ... "year_y" : "2017", ... "month" : "12", ... "bid" : "0123", ... "tag" : "alpha", ... } >>> >>> version_info = pattern.regexp.match("201712.1234") >>> assert version_info is None >>> version_info = pattern.regexp.match("v201713.1234") >>> assert version_info is None >>> version_info = pattern.regexp.match("v201712.1234") >>> assert version_info.groupdict() == { ... "year_y" : "2017", ... "month" : "12", ... "bid" : "1234", ... "tag" : None, ... } """ import re import typing as typ import logging from . import utils from .patterns import RE_PATTERN_ESCAPES from .patterns import Pattern logger = logging.getLogger("pycalver.v2patterns") # NOTE (mb 2020-09-17): For patterns with different options '(AAA|BB|C)', the # patterns with more digits should be first/left of those with fewer digits: # # good: (?:1[0-2]|[1-9]) # bad: (?:[1-9]|1[0-2]) # # This ensures that the longest match is done for a pattern. # # This implies that patterns for smaller numbers sometimes must be right of # those for larger numbers. To be consistent we use this ordering not # sometimes but always (even though in theory it wouldn't matter): # # good: (?:3[0-1]|[1-2][0-9]|[1-9]) # bad: (?:[1-2][0-9]|3[0-1]|[1-9]) PART_PATTERNS = { # Based on calver.org 'YYYY': r"[1-9][0-9]{3}", 'YY' : r"[1-9][0-9]?", '0Y' : r"[0-9]{2}", 'GGGG': r"[1-9][0-9]{3}", 'GG' : r"[1-9][0-9]?", '0G' : r"[0-9]{2}", 'Q' : r"[1-4]", 'MM' : r"1[0-2]|[1-9]", '0M' : r"1[0-2]|0[1-9]", 'DD' : r"3[0-1]|[1-2][0-9]|[1-9]", '0D' : r"3[0-1]|[1-2][0-9]|0[1-9]", 'JJJ' : r"36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|[1-9][0-9]|[1-9]", '00J' : r"36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|0[1-9][0-9]|00[1-9]", # week numbering parts 'WW': r"5[0-2]|[1-4][0-9]|[0-9]", '0W': r"5[0-2]|[0-4][0-9]", 'UU': r"5[0-2]|[1-4][0-9]|[0-9]", '0U': r"5[0-2]|[0-4][0-9]", 'VV': r"5[0-3]|[1-4][0-9]|[1-9]", '0V': r"5[0-3]|[1-4][0-9]|0[1-9]", # non calver parts 'MAJOR' : r"[0-9]+", 'MINOR' : r"[0-9]+", 'PATCH' : r"[0-9]+", 'BUILD' : r"[0-9]+", 'BLD' : r"[1-9][0-9]*", 'RELEASE': r"preview|final|alpha|beta|post|rc", 'PYTAG' : r"post|rc|a|b", 'NUM' : r"[0-9]+", } PATTERN_PART_FIELDS = { 'YYYY' : 'year_y', 'YY' : 'year_y', '0Y' : 'year_y', 'GGGG' : 'year_g', 'GG' : 'year_g', '0G' : 'year_g', 'Q' : 'quarter', 'MM' : 'month', '0M' : 'month', 'DD' : 'dom', '0D' : 'dom', 'JJJ' : 'doy', '00J' : 'doy', 'MAJOR' : 'major', 'MINOR' : 'minor', 'PATCH' : 'patch', 'BUILD' : 'bid', 'BLD' : 'bid', 'RELEASE': 'tag', 'PYTAG' : 'pytag', 'NUM' : 'num', 'WW' : 'week_w', '0W' : 'week_w', 'UU' : 'week_u', '0U' : 'week_u', 'VV' : 'week_v', '0V' : 'week_v', } PEP440_PART_SUBSTITUTIONS = { '0W' : "WW", '0U' : "UU", '0V' : "VV", '0M' : "MM", '0D' : "DD", '00J' : "JJJ", 'BUILD' : "BLD", 'RELEASE': "PYTAG", } FieldValue = typ.Union[str, int] def _fmt_num(val: FieldValue) -> str: return str(val) def _fmt_bld(val: FieldValue) -> str: return str(int(val)) def _fmt_yy(year_y: FieldValue) -> str: return str(int(str(year_y)[-2:])) def _fmt_0y(year_y: FieldValue) -> str: return "{0:02}".format(int(str(year_y)[-2:])) def _fmt_gg(year_g: FieldValue) -> str: return str(int(str(year_g)[-2:])) def _fmt_0g(year_g: FieldValue) -> str: return "{0:02}".format(int(str(year_g)[-2:])) def _fmt_0m(month: FieldValue) -> str: return "{0:02}".format(int(month)) def _fmt_0d(dom: FieldValue) -> str: return "{0:02}".format(int(dom)) def _fmt_00j(doy: FieldValue) -> str: return "{0:03}".format(int(doy)) def _fmt_0w(week_w: FieldValue) -> str: return "{0:02}".format(int(week_w)) def _fmt_0u(week_u: FieldValue) -> str: return "{0:02}".format(int(week_u)) def _fmt_0v(week_v: FieldValue) -> str: return "{0:02}".format(int(week_v)) PART_FORMATS: typ.Dict[str, typ.Callable[[FieldValue], str]] = { 'YYYY' : _fmt_num, 'YY' : _fmt_yy, '0Y' : _fmt_0y, 'GGGG' : _fmt_num, 'GG' : _fmt_gg, '0G' : _fmt_0g, 'Q' : _fmt_num, 'MM' : _fmt_num, '0M' : _fmt_0m, 'DD' : _fmt_num, '0D' : _fmt_0d, 'JJJ' : _fmt_num, '00J' : _fmt_00j, 'MAJOR' : _fmt_num, 'MINOR' : _fmt_num, 'PATCH' : _fmt_num, 'BUILD' : _fmt_num, 'BLD' : _fmt_bld, 'RELEASE': _fmt_num, 'PYTAG' : _fmt_num, 'NUM' : _fmt_num, 'WW' : _fmt_num, '0W' : _fmt_0w, 'UU' : _fmt_num, '0U' : _fmt_0u, 'VV' : _fmt_num, '0V' : _fmt_0v, } def _convert_to_pep440(version_pattern: str) -> str: # NOTE (mb 2020-09-20): This does not support some # corner cases as specified in PEP440, in particular # related to post and dev releases. pep440_pattern = version_pattern if pep440_pattern.startswith("v"): pep440_pattern = pep440_pattern[1:] pep440_pattern = pep440_pattern.replace(r"\[", "") pep440_pattern = pep440_pattern.replace(r"\]", "") pep440_pattern, _ = re.subn(r"[^a-zA-Z0-9\.\[\]]", "", pep440_pattern) part_names = list(PATTERN_PART_FIELDS.keys()) part_names.sort(key=len, reverse=True) for part_name in part_names: if part_name not in version_pattern: continue if part_name not in PEP440_PART_SUBSTITUTIONS: continue substitution = PEP440_PART_SUBSTITUTIONS[part_name] is_numerical_part = part_name not in ('RELEASE', 'PYTAG') if is_numerical_part: part_index = pep440_pattern.find(part_name) is_zero_truncation_part = part_index == 0 or pep440_pattern[part_index - 1] == "." if is_zero_truncation_part: pep440_pattern = pep440_pattern.replace(part_name, substitution) else: pep440_pattern = pep440_pattern.replace(part_name, substitution) # PYTAG and NUM must be adjacent and also be the last (optional) part if 'PYTAGNUM' not in pep440_pattern: pep440_pattern = pep440_pattern.replace("PYTAG", "") pep440_pattern = pep440_pattern.replace("NUM" , "") pep440_pattern = pep440_pattern.replace("[]" , "") pep440_pattern += "[PYTAGNUM]" return pep440_pattern def normalize_pattern(version_pattern: str, raw_pattern: str) -> str: normalized_pattern = raw_pattern if "{version}" in raw_pattern: normalized_pattern = normalized_pattern.replace("{version}", version_pattern) if "{pep440_version}" in normalized_pattern: pep440_version_pattern = _convert_to_pep440(version_pattern) normalized_pattern = normalized_pattern.replace("{pep440_version}", pep440_version_pattern) return normalized_pattern def _replace_pattern_parts(pattern: str) -> str: # The pattern is escaped, so that everything besides the format # string variables is treated literally. while True: new_pattern, _n = re.subn(r"([^\\]|^)\[", r"\1(?:", pattern) new_pattern, _m = re.subn(r"([^\\]|^)\]", r"\1)?" , new_pattern) pattern = new_pattern if _n + _m == 0: break SortKey = typ.Tuple[int, int] PostitionedPart = typ.Tuple[int, int, str] part_patterns_by_index: typ.Dict[SortKey, PostitionedPart] = {} for part_name, part_pattern in PART_PATTERNS.items(): start_idx = pattern.find(part_name) if start_idx >= 0: field = PATTERN_PART_FIELDS[part_name] named_part_pattern = f"(?P<{field}>{part_pattern})" end_idx = start_idx + len(part_name) sort_key = (-end_idx, -len(part_name)) part_patterns_by_index[sort_key] = (start_idx, end_idx, named_part_pattern) # NOTE (mb 2020-09-17): The sorting is done so that we process items: # - right before left # - longer before shorter last_start_idx = len(pattern) + 1 result_pattern = pattern for _, (start_idx, end_idx, named_part_pattern) in sorted(part_patterns_by_index.items()): if end_idx <= last_start_idx: result_pattern = ( result_pattern[:start_idx] + named_part_pattern + result_pattern[end_idx:] ) last_start_idx = start_idx return result_pattern def _compile_pattern_re(normalized_pattern: str) -> typ.Pattern[str]: escaped_pattern = normalized_pattern for char, escaped in RE_PATTERN_ESCAPES: # [] braces are used for optional parts, such as [-RELEASE]/[-beta] # and need to be escaped manually. is_semantic_char = char in "[]\\" if not is_semantic_char: # escape it so it is a literal in the re pattern escaped_pattern = escaped_pattern.replace(char, escaped) pattern_str = _replace_pattern_parts(escaped_pattern) return re.compile(pattern_str) @utils.memo def compile_pattern(version_pattern: str, raw_pattern: typ.Optional[str] = None) -> Pattern: _raw_pattern = version_pattern if raw_pattern is None else raw_pattern normalized_pattern = normalize_pattern(version_pattern, _raw_pattern) regexp = _compile_pattern_re(normalized_pattern) return Pattern(version_pattern, normalized_pattern, regexp) def compile_patterns(version_pattern: str, raw_patterns: typ.List[str]) -> typ.List[Pattern]: return [compile_pattern(version_pattern, raw_pattern) for raw_pattern in raw_patterns]