bumpver/src/pycalver/parse.py

# This file is part of the pycalver project
# https://github.com/mbarkhau/pycalver
#
# (C) 2018 Manuel Barkhau (@mbarkhau)
# SPDX-License-Identifier: MIT

import re
import logging
import typing as typ
import pkg_resources


log = logging.getLogger("pycalver.parse")


VALID_RELESE_VALUES = ("alpha", "beta", "dev", "rc", "post")


# https://regex101.com/r/fnj60p/10
PYCALVER_RE: typ.Pattern[str] = re.compile(r"""
\b
(?P<version>
    (?P<calver>
       v                        # "v" version prefix
       (?P<year>\d{4})
       (?P<month>\d{2})
    )
    (?P<build>
        \.                      # "." build nr prefix
        \d{4,}
    )
    (?P<release>
        \-                      # "-" release prefix
        (?:alpha|beta|dev|rc|post)
    )?
)(?:\s|$)
""", flags=re.VERBOSE)


# NOTE (mb 2018-09-03): These are matchers for parts, which are
#   used in the patterns, they're not for validation. This means
#   that they may find strings, which are not valid pycalver
#   strings, when parsed in their full context. For such cases,
#   the patterns should be expanded.


RE_PATTERN_PARTS = {
    "pep440_version" : r"\d{6}\.[1-9]\d*(a|b|dev|rc|post)?\d*",
    "version"        : r"v\d{6}\.\d{4,}(\-(alpha|beta|dev|rc|post))?",
    "calver"         : r"v\d{6}",
    "build"          : r"\.\d{4,}",
    "release"        : r"(\-(alpha|beta|dev|rc|post))?",
}


class PatternMatch(typ.NamedTuple):

    lineno  : int                   # zero based
    line    : str
    pattern : str
    span    : typ.Tuple[int, int]
    match   : str


class VersionInfo(typ.NamedTuple):

    pep440_version : str
    version        : str
    calver         : str
    year           : str
    month          : str
    build          : str
    release        : typ.Optional[str]


def parse_version_info(version: str) -> VersionInfo:
    match = PYCALVER_RE.match(version)
    if match is None:
        raise ValueError(f"Invalid pycalver: {version}")
    pep440_version = str(pkg_resources.parse_version(version))
    return VersionInfo(pep440_version=pep440_version, **match.groupdict())


def iter_pattern_matches(lines: typ.List[str], pattern: str) -> typ.Iterable[PatternMatch]:
    # The pattern is escaped, so that everything besides the format
    # string variables is treated literally.
    pattern_re = re.compile(
        pattern
        .replace("\\", "\\\\")
        .replace("-", "\\-")
        .replace(".", "\\.")
        .replace("+", "\\+")
        .replace("*", "\\*")
        .replace("[", "\\[")
        .replace("(", "\\(")
        .format(**RE_PATTERN_PARTS)
    )
    for lineno, line in enumerate(lines):
        match = pattern_re.search(line)
        if match:
            yield PatternMatch(lineno, line, pattern, match.span(), match.group(0))


def parse_patterns(lines: typ.List[str], patterns: typ.List[str]) -> typ.List[PatternMatch]:
    all_matches: typ.List[PatternMatch] = []
    for pattern in patterns:
        all_matches.extend(iter_pattern_matches(lines, pattern))
    return all_matches