add pretty printing for regex patterns

This commit is contained in:
Manuel Barkhau 2020-10-03 16:44:30 +00:00
parent e10f858c40
commit e2b274a7bf
7 changed files with 278 additions and 44 deletions

View file

@ -9,6 +9,7 @@ __main__ module for PyCalVer.
Enables use as module: $ python -m pycalver --version
"""
import io
import sys
import typing as typ
import logging
@ -16,6 +17,7 @@ import datetime as dt
import subprocess as sp
import click
import colorama
from . import vcs
from . import v1cli
@ -23,6 +25,8 @@ from . import v2cli
from . import config
from . import rewrite
from . import version
from . import patterns
from . import regexfmt
from . import v1rewrite
from . import v1version
from . import v2rewrite
@ -149,7 +153,7 @@ def test(
) -> None:
"""Increment a version number for demo purposes."""
_configure_logging(verbose=max(_VERBOSE, verbose))
raw_pattern = pattern
raw_pattern = pattern # use internal naming convention
_validate_release_tag(release)
_date = _validate_date(date, pin_date)
@ -491,5 +495,108 @@ def bump(
_try_bump(cfg, new_version, commit_message, allow_dirty)
def _grep_text(pattern: patterns.Pattern, text: str, color: bool) -> int:
match_count = 0
all_lines = text.splitlines()
for match in pattern.regexp.finditer(text):
match_count += 1
match_start, match_end = match.span()
line_idx = text[:match_start].count("\n")
line_start = text.rfind("\n", 0, match_start) + 1
line_end = text.find("\n", match_end, -1)
if color:
matched_line = (
text[line_start:match_start]
+ colorama.Style.BRIGHT
+ text[match_start:match_end]
+ colorama.Style.RESET_ALL
+ text[match_end:line_end]
)
else:
matched_line = (
text[line_start:match_start]
+ text[match_start:match_end]
+ text[match_end:line_end]
)
lines_offset = max(0, line_idx - 1) + 1
lines = all_lines[line_idx - 1 : line_idx + 2]
if line_idx == 0:
lines[0] = matched_line
else:
lines[1] = matched_line
for i, line in enumerate(lines):
print(f"{lines_offset + i:>4}: {line}")
print()
return match_count
def _grep(
raw_pattern: str,
file_ios : typ.Tuple[io.TextIOWrapper],
color : bool,
) -> None:
pattern = v2patterns.compile_pattern(raw_pattern)
match_count = 0
for file_io in file_ios:
text = file_io.read()
_match_count = _grep_text(pattern, text, color)
print()
print(f"Found {_match_count} match for pattern '{raw_pattern}' in {file_io.name}")
print()
match_count += _match_count
if match_count == 0 or _VERBOSE:
pyexpr_regex = regexfmt.pyexpr_regex(pattern.regexp.pattern)
print(f"# pycalver pattern: '{raw_pattern}'")
print("# " + regexfmt.regex101_url(pattern))
print(pyexpr_regex)
print()
if match_count == 0:
sys.exit(1)
@cli.command()
@click.option(
"-v",
"--verbose",
count=True,
help="Control log level. -vv for debug level.",
)
@click.argument("pattern")
@click.argument('files', nargs=-1, type=click.File('r'))
def grep(
pattern: str,
files : typ.Tuple[io.TextIOWrapper],
verbose: int = 0,
) -> None:
"""Search files for a version pattern."""
verbose = max(_VERBOSE, verbose)
_configure_logging(verbose)
raw_pattern = pattern # use internal naming convention
isatty = getattr(sys.stdout, 'isatty', lambda: False)
if isatty():
colorama.init()
try:
_grep(raw_pattern, files, color=True)
finally:
colorama.deinit()
else:
_grep(raw_pattern, files, color=False)
if __name__ == '__main__':
cli()

42
src/pycalver/pysix.py Normal file
View file

@ -0,0 +1,42 @@
import sys
import typing as typ
PY2 = sys.version < "3"
try:
from urllib.parse import quote as py3_stdlib_quote
except ImportError:
from urllib import quote as py2_stdlib_quote # type: ignore
# NOTE (mb 2016-05-23): quote in python2 expects bytes argument.
def quote(
string : str,
safe : str = "/",
encoding: typ.Optional[str] = None,
errors : typ.Optional[str] = None,
) -> str:
if not isinstance(string, str):
errmsg = f"Expected str/unicode but got {type(string)}" # type: ignore
raise TypeError(errmsg)
if encoding is None:
_encoding = "utf-8"
else:
_encoding = encoding
if errors is None:
_errors = "strict"
else:
_errors = errors
if PY2:
data = string.encode(_encoding)
res = py2_stdlib_quote(data, safe=safe.encode(_encoding))
return res.decode(_encoding, errors=_errors)
else:
return py3_stdlib_quote(string, safe=safe, encoding=_encoding, errors=_errors)

68
src/pycalver/regexfmt.py Normal file
View file

@ -0,0 +1,68 @@
import re
import textwrap
from . import pysix
from . import patterns
def format_regex(regex: str) -> str:
r"""Format a regex pattern suitible for flags=re.VERBOSE.
>>> regex = r"\[CalVer v(?P<year_y>[1-9][0-9]{3})(?P<month>(?:1[0-2]|0[1-9]))"
>>> print(format_regex(regex))
\[CalVer[ ]v
(?P<year_y>[1-9][0-9]{3})
(?P<month>
(?:1[0-2]|0[1-9])
)
"""
# provoke error for invalid regex
re.compile(regex)
tmp_regex = regex.replace(" ", r"[ ]")
tmp_regex, _ = re.subn(r"([^\\])?\)(\?)?", "\\1)\\2\n", tmp_regex)
tmp_regex, _ = re.subn(r"([^\\])\(" , "\\1\n(" , tmp_regex)
tmp_regex, _ = re.subn(r"^\)\)" , ")\n)" , tmp_regex, flags=re.MULTILINE)
lines = tmp_regex.splitlines()
indented_lines = []
level = 0
for line in lines:
if line.strip():
increment = line.count("(") - line.count(")")
if increment >= 0:
line = " " * level + line
level += increment
else:
level += increment
line = " " * level + line
indented_lines.append(line)
formatted_regex = "\n".join(indented_lines)
# provoke error if there is a bug in the formatting code
re.compile(formatted_regex)
return formatted_regex
def pyexpr_regex(regex: str) -> str:
try:
formatted_regex = format_regex(regex)
formatted_regex = textwrap.indent(formatted_regex.rstrip(), " ")
return 're.compile(r"""\n' + formatted_regex + '\n""", flags=re.VERBOSE)'
except re.error:
return f"re.compile({repr(regex)})"
def regex101_url(pattern: patterns.Pattern) -> str:
try:
regex_text = format_regex(pattern.regexp.pattern)
except re.error:
regex_text = pattern.regexp.pattern
return "".join(
(
"https://regex101.com/",
"?flavor=python",
"&flags=gmx" "&regex=" + pysix.quote(regex_text),
)
)