feat: all regex patterns have a capture group

This commit is contained in:
graelo 2021-03-27 15:36:48 +01:00
parent 8924af3c51
commit eaeaf5268b
3 changed files with 20 additions and 17 deletions

View file

@ -579,7 +579,7 @@ mod tests {
let lines = buffer.split('\n').collect::<Vec<_>>();
let use_all_patterns = true;
let named_pat = vec![];
let custom: Vec<String> = ["CUSTOM-[0-9]{4,}", "ISSUE-[0-9]{3}"]
let custom: Vec<String> = ["(CUSTOM-[0-9]{4,})", "(ISSUE-[0-9]{3})"]
.iter()
.map(|&s| s.to_string())
.collect();

View file

@ -133,17 +133,16 @@ fn find_raw_spans<'a>(
if *pat_name != "ansi_colors" {
let text = reg_match.as_str();
// In case the pattern has a capturing group, try obtaining
// that text and start offset, else use the entire match.
let (subtext, substart) = match reg
// All patterns must have a capturing group: try obtaining
// that text and start offset.
let capture = reg
.captures_iter(text)
.next()
.expect("This regex is guaranteed to match.")
.get(1)
{
Some(capture) => (capture.as_str(), capture.start()),
None => (text, 0),
};
.expect("This regex should have a capture group.");
let (subtext, substart) = (capture.as_str(), capture.start());
raw_spans.push(RawSpan {
x: offset + reg_match.start() as i32 + substart as i32,

View file

@ -1,3 +1,7 @@
//! This module defines the regex patterns.
//!
//! All patterns must have one capture group. The first group is used.
use crate::error;
pub(super) const EXCLUDE_PATTERNS: [(&str, &str); 1] =
@ -13,30 +17,30 @@ pub(super) const PATTERNS: [(&str, &str); 17] = [
"url",
r"((https?://|git@|git://|ssh://|ftp://|file:///)[^ \(\)\[\]\{\}]+)",
),
("email", r"\b[A-z0-9._%+-]+@[A-z0-9.-]+\.[A-z]{2,}\b"),
("email", r"\b([A-z0-9._%+-]+@[A-z0-9.-]+\.[A-z]{2,})\b"),
("diff-a", r"--- a/([^ ]+)"),
("diff-b", r"\+\+\+ b/([^ ]+)"),
("docker", r"sha256:([0-9a-f]{64})"),
("path", r"(([.\w\-@~]+)?(/[.\w\-@]+)+)"),
("hexcolor", r"#[0-9a-fA-F]{6}"),
("hexcolor", r"(#[0-9a-fA-F]{6})"),
(
"uuid",
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
r"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})",
),
(
"version",
r"(v?\d{1,4}\.\d{1,4}(\.\d{1,4})?(-(alpha|beta|rc)(\.\d)?)?)[^.0-9s]",
),
("ipfs", r"Qm[0-9a-zA-Z]{44}"),
("sha", r"[0-9a-f]{7,40}"),
("ipv4", r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"),
("ipv6", r"[A-f0-9:]+:+[A-f0-9:]+[%\w\d]+"),
("pointer-address", r"0x[0-9a-fA-F]+"),
("ipfs", r"(Qm[0-9a-zA-Z]{44})"),
("sha", r"([0-9a-f]{7,40})"),
("ipv4", r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"),
("ipv6", r"([A-f0-9:]+:+[A-f0-9:]+[%\w\d]+)"),
("pointer-address", r"(0x[0-9a-fA-F]+)"),
(
"datetime",
r"(\d{4}-?\d{2}-?\d{2}([ T]\d{2}:\d{2}:\d{2}(\.\d{3,9})?)?)",
),
("digits", r"[0-9]{4,}"),
("digits", r"([0-9]{4,})"),
];
/// Type-safe string Pattern Name (newtype).