add pretty printing for regex patterns

2025-12-13 23:03:54 +01:00 · 2020-10-03 16:44:30 +00:00 · 2020-10-03 16:44:30 +00:00 · e2b274a7bf
commit e2b274a7bf
parent e10f858c40
7 changed files with 278 additions and 44 deletions
--- a/src/pycalver/main.py
+++ b/src/pycalver/main.py
@ -9,6 +9,7 @@ __main__ module for PyCalVer.

 Enables use as module: $ python -m pycalver --version
 """
+import io
 import sys
 import typing as typ
 import logging
@ -16,6 +17,7 @@ import datetime as dt
 import subprocess as sp

 import click
+import colorama

 from . import vcs
 from . import v1cli
@ -23,6 +25,8 @@ from . import v2cli
 from . import config
 from . import rewrite
 from . import version
+from . import patterns
+from . import regexfmt
 from . import v1rewrite
 from . import v1version
 from . import v2rewrite
@ -149,7 +153,7 @@ def test(
 ) -> None:
    """Increment a version number for demo purposes."""
    _configure_logging(verbose=max(_VERBOSE, verbose))
-    raw_pattern = pattern
+    raw_pattern = pattern  # use internal naming convention

    _validate_release_tag(release)
    _date = _validate_date(date, pin_date)
@ -491,5 +495,108 @@ def bump(
    _try_bump(cfg, new_version, commit_message, allow_dirty)


+def _grep_text(pattern: patterns.Pattern, text: str, color: bool) -> int:
+    match_count = 0
+    all_lines   = text.splitlines()
+    for match in pattern.regexp.finditer(text):
+        match_count += 1
+        match_start, match_end = match.span()
+
+        line_idx   = text[:match_start].count("\n")
+        line_start = text.rfind("\n", 0, match_start) + 1
+        line_end   = text.find("\n", match_end, -1)
+        if color:
+            matched_line = (
+                text[line_start:match_start]
+                + colorama.Style.BRIGHT
+                + text[match_start:match_end]
+                + colorama.Style.RESET_ALL
+                + text[match_end:line_end]
+            )
+        else:
+            matched_line = (
+                text[line_start:match_start]
+                + text[match_start:match_end]
+                + text[match_end:line_end]
+            )
+
+        lines_offset = max(0, line_idx - 1) + 1
+        lines        = all_lines[line_idx - 1 : line_idx + 2]
+
+        if line_idx == 0:
+            lines[0] = matched_line
+        else:
+            lines[1] = matched_line
+
+        for i, line in enumerate(lines):
+            print(f"{lines_offset + i:>4}: {line}")
+
+        print()
+    return match_count
+
+
+def _grep(
+    raw_pattern: str,
+    file_ios   : typ.Tuple[io.TextIOWrapper],
+    color      : bool,
+) -> None:
+    pattern = v2patterns.compile_pattern(raw_pattern)
+
+    match_count = 0
+    for file_io in file_ios:
+        text = file_io.read()
+
+        _match_count = _grep_text(pattern, text, color)
+
+        print()
+        print(f"Found {_match_count} match for pattern '{raw_pattern}' in {file_io.name}")
+        print()
+
+        match_count += _match_count
+
+    if match_count == 0 or _VERBOSE:
+        pyexpr_regex = regexfmt.pyexpr_regex(pattern.regexp.pattern)
+
+        print(f"# pycalver pattern: '{raw_pattern}'")
+        print("# " + regexfmt.regex101_url(pattern))
+        print(pyexpr_regex)
+        print()
+
+    if match_count == 0:
+        sys.exit(1)
+
+
+@cli.command()
+@click.option(
+    "-v",
+    "--verbose",
+    count=True,
+    help="Control log level. -vv for debug level.",
+)
+@click.argument("pattern")
+@click.argument('files', nargs=-1, type=click.File('r'))
+def grep(
+    pattern: str,
+    files  : typ.Tuple[io.TextIOWrapper],
+    verbose: int = 0,
+) -> None:
+    """Search files for a version pattern."""
+    verbose = max(_VERBOSE, verbose)
+    _configure_logging(verbose)
+
+    raw_pattern = pattern  # use internal naming convention
+
+    isatty = getattr(sys.stdout, 'isatty', lambda: False)
+
+    if isatty():
+        colorama.init()
+        try:
+            _grep(raw_pattern, files, color=True)
+        finally:
+            colorama.deinit()
+    else:
+        _grep(raw_pattern, files, color=False)
+
+
 if __name__ == '__main__':
    cli()
--- a/src/pycalver/pysix.py
+++ b/src/pycalver/pysix.py
@ -0,0 +1,42 @@
+import sys
+import typing as typ
+
+PY2 = sys.version < "3"
+
+
+try:
+    from urllib.parse import quote as py3_stdlib_quote
+except ImportError:
+    from urllib import quote as py2_stdlib_quote  # type: ignore
+
+
+# NOTE (mb 2016-05-23): quote in python2 expects bytes argument.
+
+
+def quote(
+    string  : str,
+    safe    : str = "/",
+    encoding: typ.Optional[str] = None,
+    errors  : typ.Optional[str] = None,
+) -> str:
+    if not isinstance(string, str):
+        errmsg = f"Expected str/unicode but got {type(string)}"  # type: ignore
+        raise TypeError(errmsg)
+
+    if encoding is None:
+        _encoding = "utf-8"
+    else:
+        _encoding = encoding
+
+    if errors is None:
+        _errors = "strict"
+    else:
+        _errors = errors
+
+    if PY2:
+        data = string.encode(_encoding)
+
+        res = py2_stdlib_quote(data, safe=safe.encode(_encoding))
+        return res.decode(_encoding, errors=_errors)
+    else:
+        return py3_stdlib_quote(string, safe=safe, encoding=_encoding, errors=_errors)
--- a/src/pycalver/regexfmt.py
+++ b/src/pycalver/regexfmt.py
@ -0,0 +1,68 @@
+import re
+import textwrap
+
+from . import pysix
+from . import patterns
+
+
+def format_regex(regex: str) -> str:
+    r"""Format a regex pattern suitible for flags=re.VERBOSE.
+
+    >>> regex = r"\[CalVer v(?P<year_y>[1-9][0-9]{3})(?P<month>(?:1[0-2]|0[1-9]))"
+    >>> print(format_regex(regex))
+    \[CalVer[ ]v
+    (?P<year_y>[1-9][0-9]{3})
+    (?P<month>
+        (?:1[0-2]|0[1-9])
+    )
+    """
+    # provoke error for invalid regex
+    re.compile(regex)
+
+    tmp_regex = regex.replace(" ", r"[ ]")
+    tmp_regex, _ = re.subn(r"([^\\])?\)(\?)?", "\\1)\\2\n", tmp_regex)
+    tmp_regex, _ = re.subn(r"([^\\])\("      , "\\1\n("   , tmp_regex)
+    tmp_regex, _ = re.subn(r"^\)\)"          , ")\n)"     , tmp_regex, flags=re.MULTILINE)
+    lines          = tmp_regex.splitlines()
+    indented_lines = []
+    level          = 0
+    for line in lines:
+        if line.strip():
+            increment = line.count("(") - line.count(")")
+            if increment >= 0:
+                line = "    " * level + line
+                level += increment
+            else:
+                level += increment
+                line = "    " * level + line
+            indented_lines.append(line)
+
+    formatted_regex = "\n".join(indented_lines)
+
+    # provoke error if there is a bug in the formatting code
+    re.compile(formatted_regex)
+    return formatted_regex
+
+
+def pyexpr_regex(regex: str) -> str:
+    try:
+        formatted_regex = format_regex(regex)
+        formatted_regex = textwrap.indent(formatted_regex.rstrip(), "    ")
+        return 're.compile(r"""\n' + formatted_regex + '\n""", flags=re.VERBOSE)'
+    except re.error:
+        return f"re.compile({repr(regex)})"
+
+
+def regex101_url(pattern: patterns.Pattern) -> str:
+    try:
+        regex_text = format_regex(pattern.regexp.pattern)
+    except re.error:
+        regex_text = pattern.regexp.pattern
+
+    return "".join(
+        (
+            "https://regex101.com/",
+            "?flavor=python",
+            "&flags=gmx" "&regex=" + pysix.quote(regex_text),
+        )
+    )