add v2 parsing

This commit is contained in:
Manuel Barkhau 2020-09-17 23:45:25 +00:00
parent d4bd8a5931
commit 5940fdbc40
3 changed files with 435 additions and 322 deletions

View file

@ -3,30 +3,33 @@
#
# Copyright (c) 2018-2020 Manuel Barkhau (mbarkhau@gmail.com) - MIT License
# SPDX-License-Identifier: MIT
# """Compose Regular Expressions from Patterns.
"""Compose Regular Expressions from Patterns.
# >>> pattern = compile_pattern("vYYYY0M.BUILD[-TAG]")
# >>> version_info = pattern.regexp.match("v201712.0123-alpha")
# >>> assert version_info == {
# ... "version": "v201712.0123-alpha",
# ... "YYYY" : "2017",
# ... "0M" : "12",
# ... "BUILD" : "0123",
# ... "TAG" : "alpha",
# ... }
# >>>
# >>> version_info = pattern.regexp.match("201712.1234")
# >>> assert version_info is None
>>> pattern = compile_pattern("vYYYY0M.BUILD[-TAG]")
>>> version_info = pattern.regexp.match("v201712.0123-alpha")
>>> assert version_info.groupdict() == {
... "version": "v201712.0123-alpha",
... "year_y" : "2017",
... "month" : "12",
... "bid" : "0123",
... "tag" : "alpha",
... }
>>>
>>> version_info = pattern.regexp.match("201712.1234")
>>> assert version_info is None
# >>> version_info = pattern.regexp.match("v201712.1234")
# >>> assert version_info == {
# ... "version": "v201712.0123-alpha",
# ... "YYYY" : "2017",
# ... "0M" : "12",
# ... "BUILD" : "0123",
# ... "TAG" : None,
# ... }
# """
>>> version_info = pattern.regexp.match("v201713.1234")
>>> assert version_info is None
>>> version_info = pattern.regexp.match("v201712.1234")
>>> assert version_info.groupdict() == {
... "version": "v201712.1234",
... "year_y" : "2017",
... "month" : "12",
... "bid" : "1234",
... "tag" : None,
... }
"""
import re
import typing as typ
@ -42,43 +45,52 @@ PATTERN_ESCAPES = [
("?" , "\u005c?"),
("{" , "\u005c{"),
("}" , "\u005c}"),
("[" , "\u005c["),
("]" , "\u005c]"),
("(" , "\u005c("),
(")" , "\u005c)"),
# ("[" , "\u005c["), # [braces] are used for optional parts
# ("]" , "\u005c]"),
("(", "\u005c("),
(")", "\u005c)"),
]
# NOTE (mb 2020-09-17): For patterns with different options, the longer
# patterns should be first/left (e.g. for 'MM', `1[0-2]` before `[1-9]`).
# This ensures that the longest match is done rather than the shortest.
# To have a consistent ordering, we always put the pattern that matches
# the larger number first (even if the patterns would otherwise be the
# same size).
PART_PATTERNS = {
# Based on calver.org
'YYYY': r"[1-9][0-9]{3}",
'YY' : r"[1-9][0-9]?",
'0Y' : r"[0-9]{2}",
'Q' : r"[1-4]",
'MM' : r"(?:[1-9]|1[0-2])",
'0M' : r"(?:0[1-9]|1[0-2])",
'DD' : r"(?:[1-9]|[1-2][0-9]|3[0-1])",
'0D' : r"(?:0[1-9]|[1-2][0-9]|3[0-1])",
'JJJ' : r"(?:[1-9]|[1-9][0-9]|[1-2][0-9][0-9]|3[0-5][0-9]|36[0-6])",
'00J' : r"(?:00[1-9]|0[1-9][0-9]|[1-2][0-9][0-9]|3[0-5][0-9]|36[0-6])",
# week numbering parts
'WW' : r"(?:[0-9]|[1-4][0-9]|5[0-2])",
'0W' : r"(?:[0-4][0-9]|5[0-2])",
'UU' : r"(?:[0-9]|[1-4][0-9]|5[0-2])",
'0U' : r"(?:[0-4][0-9]|5[0-2])",
'VV' : r"(?:[1-9]|[1-4][0-9]|5[0-3])",
'0V' : r"(?:0[1-9]|[1-4][0-9]|5[0-3])",
'GGGG': r"[1-9][0-9]{3}",
'GG' : r"[1-9][0-9]?",
'0G' : r"[0-9]{2}",
'Q' : r"[1-4]",
'MM' : r"(?:1[0-2]|[1-9])",
'0M' : r"(?:1[0-2]|0[1-9])",
'DD' : r"(?:3[0-1]|[1-2][0-9]|[1-9])",
'0D' : r"(?:3[0-1]|[1-2][0-9]|0[1-9])",
'JJJ' : r"(?:36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|[1-9][0-9]|[1-9])",
'00J' : r"(?:36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|0[1-9][0-9]|00[1-9])",
# week numbering parts
'WW': r"(?:5[0-2]|[1-4][0-9]|[0-9])",
'0W': r"(?:5[0-2]|[0-4][0-9])",
'UU': r"(?:5[0-2]|[1-4][0-9]|[0-9])",
'0U': r"(?:5[0-2]|[0-4][0-9])",
'VV': r"(?:5[0-3]|[1-4][0-9]|[1-9])",
'0V': r"(?:5[0-3]|[1-4][0-9]|0[1-9])",
# non calver parts
'MAJOR': r"[0-9]+",
'MINOR': r"[0-9]+",
'PATCH': r"[0-9]+",
'MICRO': r"[0-9]+",
'BUILD': r"[0-9]+",
'TAG' : r"(?:alpha|beta|dev|rc|post|final)",
'PYTAG': r"(?:a|b|dev|rc|post)?[0-9]*",
'TAG' : r"(?:alpha|beta|dev|pre|rc|post|final)",
'PYTAG': r"(?:a|b|dev|rc|post)",
'NUM' : r"[0-9]+",
}
PATTERN_PART_FIELDS = {
'YYYY' : 'year_y',
@ -109,17 +121,118 @@ PATTERN_PART_FIELDS = {
'VV' : 'week_v',
'0V' : 'week_v',
}
FieldValue = typ.Union[str, int]
def _fmt_num(val: FieldValue) -> str:
return str(val)
def _fmt_yy(year_y: FieldValue) -> str:
return str(int(str(year_y)[-2:]))
def _fmt_0y(year_y: FieldValue) -> str:
return "{0:02}".format(int(str(year_y)[-2:]))
def _fmt_gg(year_g: FieldValue) -> str:
return str(int(str(year_g)[-2:]))
def _fmt_0g(year_g: FieldValue) -> str:
return "{0:02}".format(int(str(year_g)[-2:]))
def _fmt_0m(month: FieldValue) -> str:
return "{0:02}".format(int(month))
def _fmt_0d(dom: FieldValue) -> str:
return "{0:02}".format(int(dom))
def _fmt_00j(doy: FieldValue) -> str:
return "{0:03}".format(int(doy))
def _fmt_0w(week_w: FieldValue) -> str:
return "{0:02}".format(int(week_w))
def _fmt_0u(week_u: FieldValue) -> str:
return "{0:02}".format(int(week_u))
def _fmt_0v(week_v: FieldValue) -> str:
return "{0:02}".format(int(week_v))
PART_FORMATS: typ.Dict[str, typ.Callable[[FieldValue], str]] = {
'YYYY' : _fmt_num,
'YY' : _fmt_yy,
'0Y' : _fmt_0y,
'GGGG' : _fmt_num,
'GG' : _fmt_gg,
'0G' : _fmt_0g,
'Q' : _fmt_num,
'MM' : _fmt_num,
'0M' : _fmt_0m,
'DD' : _fmt_num,
'0D' : _fmt_0d,
'JJJ' : _fmt_num,
'00J' : _fmt_00j,
'MAJOR': _fmt_num,
'MINOR': _fmt_num,
'PATCH': _fmt_num,
'MICRO': _fmt_num,
'BUILD': _fmt_num,
'TAG' : _fmt_num,
'PYTAG': _fmt_num,
'NUM' : _fmt_num,
'WW' : _fmt_num,
'0W' : _fmt_0w,
'UU' : _fmt_num,
'0U' : _fmt_0u,
'VV' : _fmt_num,
'0V' : _fmt_0v,
}
def _replace_pattern_parts(pattern: str) -> str:
# The pattern is escaped, so that everything besides the format
# string variables is treated literally.
if "[" in pattern and "]" in pattern:
pattern = pattern.replace("[", "(?:")
pattern = pattern.replace("]", ")?")
part_patterns_by_index: typ.Dict[typ.Tuple[int, int], typ.Tuple[int, int, str]] = {}
for part_name, part_pattern in PART_PATTERNS.items():
named_part_pattern = f"(?P<{part_name}>{part_pattern})"
placeholder = "\u005c{" + part_name + "\u005c}"
pattern = pattern.replace(placeholder, named_part_pattern)
return pattern
start_idx = pattern.find(part_name)
if start_idx < 0:
continue
field = PATTERN_PART_FIELDS[part_name]
named_part_pattern = f"(?P<{field}>{part_pattern})"
end_idx = start_idx + len(part_name)
sort_key = (-end_idx, -len(part_name))
part_patterns_by_index[sort_key] = (start_idx, end_idx, named_part_pattern)
# NOTE (mb 2020-09-17): The sorting is done so that we process items:
# - right before left
# - longer before shorter
last_start_idx = len(pattern) + 1
result_pattern = pattern
for _, (start_idx, end_idx, named_part_pattern) in sorted(part_patterns_by_index.items()):
if end_idx <= last_start_idx:
result_pattern = (
result_pattern[:start_idx] + named_part_pattern + result_pattern[end_idx:]
)
last_start_idx = start_idx
return "(?P<version>" + result_pattern + ")"
def compile_pattern_str(pattern: str) -> str: