feat: can use or ignore all patterns

This commit is contained in:
graelo 2020-06-04 09:45:58 +02:00
parent 0622ab7bf6
commit 11e9735397
4 changed files with 291 additions and 53 deletions

View file

@ -11,8 +11,9 @@ pub struct Model<'a> {
// buffer: &'a str,
pub lines: Vec<&'a str>,
alphabet: &'a Alphabet,
use_all_patterns: bool,
named_patterns: &'a Vec<NamedPattern>,
custom_regexes: &'a Vec<String>,
custom_patterns: &'a Vec<String>,
pub reverse: bool,
}
@ -20,8 +21,9 @@ impl<'a> Model<'a> {
pub fn new(
buffer: &'a str,
alphabet: &'a Alphabet,
use_all_patterns: bool,
named_patterns: &'a Vec<NamedPattern>,
custom_regexes: &'a Vec<String>,
custom_patterns: &'a Vec<String>,
reverse: bool,
) -> Model<'a> {
let lines = buffer.split('\n').collect();
@ -30,8 +32,9 @@ impl<'a> Model<'a> {
// buffer,
lines,
alphabet,
use_all_patterns,
named_patterns,
custom_regexes,
custom_patterns,
reverse,
}
}
@ -73,7 +76,7 @@ impl<'a> Model<'a> {
.collect::<Vec<_>>();
let custom_regexes = self
.custom_regexes
.custom_patterns
.iter()
.map(|pattern| {
(
@ -83,7 +86,7 @@ impl<'a> Model<'a> {
})
.collect::<Vec<_>>();
let regexes = if self.named_patterns.is_empty() {
let regexes = if self.use_all_patterns {
PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
@ -252,10 +255,20 @@ mod tests {
#[test]
fn match_reverse() {
let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.first().unwrap().hint, "a");
@ -265,10 +278,20 @@ mod tests {
#[test]
fn match_unique() {
let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(true);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(true);
assert_eq!(results.len(), 3);
assert_eq!(results.first().unwrap().hint, "a");
@ -278,10 +301,20 @@ mod tests {
#[test]
fn match_docker() {
let buffer = "latest sha256:30557a29d5abc51e5f1d5b472e79b7e296f595abcf19fe6b9199dbbc809c6ff4 20 hours ago";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
assert_eq!(
@ -293,10 +326,20 @@ mod tests {
#[test]
fn match_ansi_colors() {
let buffer = "path: /var/log/nginx.log\npath: test/log/nginx-2.log:32folder/.nginx@4df2.log";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = true;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text, "/var/log/nginx.log");
@ -307,10 +350,20 @@ mod tests {
#[test]
fn match_paths() {
let buffer = "Lorem /tmp/foo/bar_lol, lorem\n Lorem /var/log/boot-strap.log lorem ../log/kern.log lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text, "/tmp/foo/bar_lol");
@ -321,10 +374,20 @@ mod tests {
#[test]
fn match_home() {
let buffer = "Lorem ~/.gnu/.config.txt, lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().text, "~/.gnu/.config.txt");
@ -333,10 +396,20 @@ mod tests {
#[test]
fn match_uuids() {
let buffer = "Lorem ipsum 123e4567-e89b-12d3-a456-426655440000 lorem\n Lorem lorem lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
}
@ -344,10 +417,20 @@ mod tests {
#[test]
fn match_shas() {
let buffer = "Lorem fd70b5695 5246ddf f924213 lorem\n Lorem 973113963b491874ab2e372ee60d4b4cb75f717c lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text, "fd70b5695");
@ -362,10 +445,20 @@ mod tests {
#[test]
fn match_ipv4s() {
let buffer = "Lorem ipsum 127.0.0.1 lorem\n Lorem 255.255.10.255 lorem 127.0.0.1 lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().pattern, "ipv4");
@ -379,10 +472,20 @@ mod tests {
#[test]
fn match_ipv6s() {
let buffer = "Lorem ipsum fe80::2:202:fe4 lorem\n Lorem 2001:67c:670:202:7ba8:5e41:1591:d723 lorem fe80::2:1 lorem ipsum fe80:22:312:fe::1%eth0";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text, "fe80::2:202:fe4");
@ -398,10 +501,20 @@ mod tests {
fn match_markdown_urls() {
let buffer =
"Lorem ipsum [link](https://github.io?foo=bar) ![](http://cdn.com/img.jpg) lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 2);
assert_eq!(results.get(0).unwrap().pattern, "markdown-url");
@ -413,10 +526,20 @@ mod tests {
#[test]
fn match_urls() {
let buffer = "Lorem ipsum https://www.rust-lang.org/tools lorem\n Lorem ipsumhttps://crates.io lorem https://github.io?foo=bar lorem ssh://github.io";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 4);
assert_eq!(
@ -436,10 +559,20 @@ mod tests {
fn match_emails() {
let buffer =
"Lorem ipsum <first.last+social@example.com> john@server.department.company.com lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 2);
assert_eq!(results.get(0).unwrap().pattern, "email");
@ -457,10 +590,20 @@ mod tests {
#[test]
fn match_addresses() {
let buffer = "Lorem 0xfd70b5695 0x5246ddf lorem\n Lorem 0x973113tlorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().pattern, "mem-address");
@ -474,10 +617,20 @@ mod tests {
#[test]
fn match_hex_colors() {
let buffer = "Lorem #fd7b56 lorem #FF00FF\n Lorem #00fF05 lorem #abcd00 lorem #afRR00";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text, "#fd7b56");
@ -489,10 +642,20 @@ mod tests {
#[test]
fn match_ipfs() {
let buffer = "Lorem QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ lorem Qmfoobar";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
assert_eq!(
@ -504,10 +667,20 @@ mod tests {
#[test]
fn match_process_port() {
let buffer = "Lorem 5695 52463 lorem\n Lorem 973113 lorem 99999 lorem 8888 lorem\n 23456 lorem 5432 lorem 23444";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 8);
}
@ -515,10 +688,20 @@ mod tests {
#[test]
fn match_diff_a() {
let buffer = "Lorem lorem\n--- a/src/main.rs";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().pattern, "diff-a");
@ -528,10 +711,20 @@ mod tests {
#[test]
fn match_diff_b() {
let buffer = "Lorem lorem\n+++ b/src/main.rs";
let use_all_patterns = true;
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().pattern, "diff-b");
@ -539,16 +732,25 @@ mod tests {
}
#[test]
fn priority() {
fn priority_between_regexes() {
let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem";
let use_all_patterns = true;
let named_pat = vec![];
let custom: Vec<String> = ["CUSTOM-[0-9]{4,}", "ISSUE-[0-9]{3}"]
.iter()
.map(|&s| s.to_string())
.collect();
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 9);
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");
@ -572,12 +774,22 @@ mod tests {
fn named_patterns() {
let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem";
let use_all_patterns = false;
use crate::regexes::parse_pattern_name;
let named_pat = vec![parse_pattern_name("url").unwrap()];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
let reverse = false;
let results = Model::new(
buffer,
&alphabet,
use_all_patterns,
&named_pat,
&custom,
reverse,
)
.matches(false);
assert_eq!(results.len(), 2);
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");