bumpver/src/pycalver2/patterns.py

249 lines
6.7 KiB
Python
Raw Normal View History

2020-09-06 20:20:36 +00:00
# This file is part of the pycalver project
# https://github.com/mbarkhau/pycalver
#
# Copyright (c) 2018-2020 Manuel Barkhau (mbarkhau@gmail.com) - MIT License
# SPDX-License-Identifier: MIT
2020-09-17 23:45:25 +00:00
"""Compose Regular Expressions from Patterns.
>>> pattern = compile_pattern("vYYYY0M.BUILD[-TAG]")
>>> version_info = pattern.regexp.match("v201712.0123-alpha")
>>> assert version_info.groupdict() == {
... "version": "v201712.0123-alpha",
... "year_y" : "2017",
... "month" : "12",
... "bid" : "0123",
... "tag" : "alpha",
... }
>>>
>>> version_info = pattern.regexp.match("201712.1234")
>>> assert version_info is None
>>> version_info = pattern.regexp.match("v201713.1234")
>>> assert version_info is None
>>> version_info = pattern.regexp.match("v201712.1234")
>>> assert version_info.groupdict() == {
... "version": "v201712.1234",
... "year_y" : "2017",
... "month" : "12",
... "bid" : "1234",
... "tag" : None,
... }
"""
2020-09-06 20:20:36 +00:00
import re
import typing as typ
2020-09-08 20:59:52 +00:00
import pycalver.patterns as v1patterns
2020-09-06 20:20:36 +00:00
PATTERN_ESCAPES = [
("\u005c", "\u005c\u005c"),
("-" , "\u005c-"),
("." , "\u005c."),
("+" , "\u005c+"),
("*" , "\u005c*"),
("?" , "\u005c?"),
("{" , "\u005c{"),
("}" , "\u005c}"),
2020-09-17 23:45:25 +00:00
# ("[" , "\u005c["), # [braces] are used for optional parts
# ("]" , "\u005c]"),
("(", "\u005c("),
(")", "\u005c)"),
2020-09-06 20:20:36 +00:00
]
2020-09-17 23:45:25 +00:00
# NOTE (mb 2020-09-17): For patterns with different options, the longer
# patterns should be first/left (e.g. for 'MM', `1[0-2]` before `[1-9]`).
# This ensures that the longest match is done rather than the shortest.
# To have a consistent ordering, we always put the pattern that matches
# the larger number first (even if the patterns would otherwise be the
# same size).
2020-09-06 20:20:36 +00:00
PART_PATTERNS = {
2020-09-17 16:24:21 +00:00
# Based on calver.org
'YYYY': r"[1-9][0-9]{3}",
'YY' : r"[1-9][0-9]?",
'0Y' : r"[0-9]{2}",
'GGGG': r"[1-9][0-9]{3}",
'GG' : r"[1-9][0-9]?",
'0G' : r"[0-9]{2}",
2020-09-17 23:45:25 +00:00
'Q' : r"[1-4]",
'MM' : r"(?:1[0-2]|[1-9])",
'0M' : r"(?:1[0-2]|0[1-9])",
'DD' : r"(?:3[0-1]|[1-2][0-9]|[1-9])",
'0D' : r"(?:3[0-1]|[1-2][0-9]|0[1-9])",
'JJJ' : r"(?:36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|[1-9][0-9]|[1-9])",
'00J' : r"(?:36[0-6]|3[0-5][0-9]|[1-2][0-9][0-9]|0[1-9][0-9]|00[1-9])",
# week numbering parts
'WW': r"(?:5[0-2]|[1-4][0-9]|[0-9])",
'0W': r"(?:5[0-2]|[0-4][0-9])",
'UU': r"(?:5[0-2]|[1-4][0-9]|[0-9])",
'0U': r"(?:5[0-2]|[0-4][0-9])",
'VV': r"(?:5[0-3]|[1-4][0-9]|[1-9])",
'0V': r"(?:5[0-3]|[1-4][0-9]|0[1-9])",
2020-09-06 20:20:36 +00:00
# non calver parts
2020-09-17 16:24:21 +00:00
'MAJOR': r"[0-9]+",
'MINOR': r"[0-9]+",
'PATCH': r"[0-9]+",
'MICRO': r"[0-9]+",
'BUILD': r"[0-9]+",
2020-09-17 23:45:25 +00:00
'TAG' : r"(?:alpha|beta|dev|pre|rc|post|final)",
'PYTAG': r"(?:a|b|dev|rc|post)",
'NUM' : r"[0-9]+",
}
PATTERN_PART_FIELDS = {
'YYYY' : 'year_y',
'YY' : 'year_y',
'0Y' : 'year_y',
'GGGG' : 'year_g',
'GG' : 'year_g',
'0G' : 'year_g',
'Q' : 'quarter',
'MM' : 'month',
'0M' : 'month',
'DD' : 'dom',
'0D' : 'dom',
'JJJ' : 'doy',
'00J' : 'doy',
'MAJOR': 'major',
'MINOR': 'minor',
'PATCH': 'patch',
'MICRO': 'patch',
'BUILD': 'bid',
'TAG' : 'tag',
'PYTAG': 'pytag',
'NUM' : 'num',
'WW' : 'week_w',
'0W' : 'week_w',
'UU' : 'week_u',
'0U' : 'week_u',
'VV' : 'week_v',
'0V' : 'week_v',
}
2020-09-17 23:45:25 +00:00
FieldValue = typ.Union[str, int]
def _fmt_num(val: FieldValue) -> str:
return str(val)
def _fmt_yy(year_y: FieldValue) -> str:
return str(int(str(year_y)[-2:]))
def _fmt_0y(year_y: FieldValue) -> str:
return "{0:02}".format(int(str(year_y)[-2:]))
def _fmt_gg(year_g: FieldValue) -> str:
return str(int(str(year_g)[-2:]))
def _fmt_0g(year_g: FieldValue) -> str:
return "{0:02}".format(int(str(year_g)[-2:]))
def _fmt_0m(month: FieldValue) -> str:
return "{0:02}".format(int(month))
def _fmt_0d(dom: FieldValue) -> str:
return "{0:02}".format(int(dom))
def _fmt_00j(doy: FieldValue) -> str:
return "{0:03}".format(int(doy))
def _fmt_0w(week_w: FieldValue) -> str:
return "{0:02}".format(int(week_w))
def _fmt_0u(week_u: FieldValue) -> str:
return "{0:02}".format(int(week_u))
def _fmt_0v(week_v: FieldValue) -> str:
return "{0:02}".format(int(week_v))
PART_FORMATS: typ.Dict[str, typ.Callable[[FieldValue], str]] = {
'YYYY' : _fmt_num,
'YY' : _fmt_yy,
'0Y' : _fmt_0y,
'GGGG' : _fmt_num,
'GG' : _fmt_gg,
'0G' : _fmt_0g,
'Q' : _fmt_num,
'MM' : _fmt_num,
'0M' : _fmt_0m,
'DD' : _fmt_num,
'0D' : _fmt_0d,
'JJJ' : _fmt_num,
'00J' : _fmt_00j,
'MAJOR': _fmt_num,
'MINOR': _fmt_num,
'PATCH': _fmt_num,
'MICRO': _fmt_num,
'BUILD': _fmt_num,
'TAG' : _fmt_num,
'PYTAG': _fmt_num,
'NUM' : _fmt_num,
'WW' : _fmt_num,
'0W' : _fmt_0w,
'UU' : _fmt_num,
'0U' : _fmt_0u,
'VV' : _fmt_num,
'0V' : _fmt_0v,
2020-09-06 20:20:36 +00:00
}
def _replace_pattern_parts(pattern: str) -> str:
2020-09-07 21:43:55 +00:00
# The pattern is escaped, so that everything besides the format
# string variables is treated literally.
2020-09-17 23:45:25 +00:00
if "[" in pattern and "]" in pattern:
pattern = pattern.replace("[", "(?:")
pattern = pattern.replace("]", ")?")
part_patterns_by_index: typ.Dict[typ.Tuple[int, int], typ.Tuple[int, int, str]] = {}
2020-09-06 20:20:36 +00:00
for part_name, part_pattern in PART_PATTERNS.items():
2020-09-17 23:45:25 +00:00
start_idx = pattern.find(part_name)
if start_idx < 0:
continue
field = PATTERN_PART_FIELDS[part_name]
named_part_pattern = f"(?P<{field}>{part_pattern})"
end_idx = start_idx + len(part_name)
sort_key = (-end_idx, -len(part_name))
part_patterns_by_index[sort_key] = (start_idx, end_idx, named_part_pattern)
# NOTE (mb 2020-09-17): The sorting is done so that we process items:
# - right before left
# - longer before shorter
last_start_idx = len(pattern) + 1
result_pattern = pattern
for _, (start_idx, end_idx, named_part_pattern) in sorted(part_patterns_by_index.items()):
if end_idx <= last_start_idx:
result_pattern = (
result_pattern[:start_idx] + named_part_pattern + result_pattern[end_idx:]
)
last_start_idx = start_idx
return "(?P<version>" + result_pattern + ")"
2020-09-06 20:20:36 +00:00
def compile_pattern_str(pattern: str) -> str:
for char, escaped in PATTERN_ESCAPES:
pattern = pattern.replace(char, escaped)
return _replace_pattern_parts(pattern)
2020-09-08 20:59:52 +00:00
def compile_pattern(pattern: str) -> v1patterns.Pattern:
2020-09-06 20:20:36 +00:00
pattern_str = compile_pattern_str(pattern)
2020-09-08 20:59:52 +00:00
pattern_re = re.compile(pattern_str)
return v1patterns.Pattern(pattern, pattern_re)