feat: search for specific pattern names

This commit is contained in:
graelo 2020-06-01 07:30:00 +02:00
parent be698ab741
commit fd76ea1491
5 changed files with 174 additions and 120 deletions

View file

@ -4,7 +4,7 @@ use std::collections::HashMap;
use std::fmt;
use crate::alphabets::Alphabet;
use crate::regexes::{EXCLUDE_PATTERNS, PATTERNS};
use crate::regexes::{NamedPattern, EXCLUDE_PATTERNS, PATTERNS};
#[derive(Clone)]
pub struct Match<'a> {
@ -52,6 +52,7 @@ impl<'a> fmt::Debug for RawMatch<'a> {
pub struct State<'a> {
pub lines: &'a Vec<&'a str>,
alphabet: &'a Alphabet,
named_patterns: &'a Vec<NamedPattern>,
custom_regexes: &'a Vec<String>,
pub reverse: bool,
}
@ -60,12 +61,14 @@ impl<'a> State<'a> {
pub fn new(
lines: &'a Vec<&'a str>,
alphabet: &'a Alphabet,
named_patterns: &'a Vec<NamedPattern>,
custom_regexes: &'a Vec<String>,
reverse: bool,
) -> State<'a> {
State {
lines,
alphabet,
named_patterns,
custom_regexes,
reverse,
}
@ -111,10 +114,17 @@ impl<'a> State<'a> {
})
.collect::<Vec<_>>();
let regexes = PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
.collect::<Vec<_>>();
let regexes = if self.named_patterns.is_empty() {
PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
} else {
self.named_patterns
.iter()
.map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
};
let all_regexes = [exclude_regexes, custom_regexes, regexes].concat();
@ -257,9 +267,10 @@ mod tests {
#[test]
fn match_reverse() {
let lines = split("lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.first().unwrap().hint, "a");
@ -269,9 +280,10 @@ mod tests {
#[test]
fn match_unique() {
let lines = split("lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(true);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(true);
assert_eq!(results.len(), 3);
assert_eq!(results.first().unwrap().hint, "a");
@ -281,9 +293,10 @@ mod tests {
#[test]
fn match_docker() {
let lines = split("latest sha256:30557a29d5abc51e5f1d5b472e79b7e296f595abcf19fe6b9199dbbc809c6ff4 20 hours ago");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
assert_eq!(
@ -295,9 +308,10 @@ mod tests {
#[test]
fn match_ansi_colors() {
let lines = split("path: /var/log/nginx.log\npath: test/log/nginx-2.log:32folder/.nginx@4df2.log");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text, "/var/log/nginx.log");
@ -308,37 +322,37 @@ mod tests {
#[test]
fn match_paths() {
let lines = split("Lorem /tmp/foo/bar_lol, lorem\n Lorem /var/log/boot-strap.log lorem ../log/kern.log lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text.clone(), "/tmp/foo/bar_lol");
assert_eq!(
results.get(1).unwrap().text.clone(),
"/var/log/boot-strap.log"
);
assert_eq!(results.get(2).unwrap().text.clone(), "../log/kern.log");
assert_eq!(results.get(0).unwrap().text, "/tmp/foo/bar_lol");
assert_eq!(results.get(1).unwrap().text, "/var/log/boot-strap.log");
assert_eq!(results.get(2).unwrap().text, "../log/kern.log");
}
#[test]
fn match_home() {
let lines = split("Lorem ~/.gnu/.config.txt, lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().text.clone(), "~/.gnu/.config.txt");
assert_eq!(results.get(0).unwrap().text, "~/.gnu/.config.txt");
}
#[test]
fn match_uids() {
fn match_uuids() {
let lines =
split("Lorem ipsum 123e4567-e89b-12d3-a456-426655440000 lorem\n Lorem lorem lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
}
@ -346,16 +360,17 @@ mod tests {
#[test]
fn match_shas() {
let lines = split("Lorem fd70b5695 5246ddf f924213 lorem\n Lorem 973113963b491874ab2e372ee60d4b4cb75f717c lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text.clone(), "fd70b5695");
assert_eq!(results.get(1).unwrap().text.clone(), "5246ddf");
assert_eq!(results.get(2).unwrap().text.clone(), "f924213");
assert_eq!(results.get(0).unwrap().text, "fd70b5695");
assert_eq!(results.get(1).unwrap().text, "5246ddf");
assert_eq!(results.get(2).unwrap().text, "f924213");
assert_eq!(
results.get(3).unwrap().text.clone(),
results.get(3).unwrap().text,
"973113963b491874ab2e372ee60d4b4cb75f717c"
);
}
@ -364,34 +379,33 @@ mod tests {
fn match_ips() {
let lines =
split("Lorem ipsum 127.0.0.1 lorem\n Lorem 255.255.10.255 lorem 127.0.0.1 lorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text.clone(), "127.0.0.1");
assert_eq!(results.get(1).unwrap().text.clone(), "255.255.10.255");
assert_eq!(results.get(2).unwrap().text.clone(), "127.0.0.1");
assert_eq!(results.get(0).unwrap().text, "127.0.0.1");
assert_eq!(results.get(1).unwrap().text, "255.255.10.255");
assert_eq!(results.get(2).unwrap().text, "127.0.0.1");
}
#[test]
fn match_ipv6s() {
let lines = split("Lorem ipsum fe80::2:202:fe4 lorem\n Lorem 2001:67c:670:202:7ba8:5e41:1591:d723 lorem fe80::2:1 lorem ipsum fe80:22:312:fe::1%eth0");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text.clone(), "fe80::2:202:fe4");
assert_eq!(results.get(0).unwrap().text, "fe80::2:202:fe4");
assert_eq!(
results.get(1).unwrap().text.clone(),
results.get(1).unwrap().text,
"2001:67c:670:202:7ba8:5e41:1591:d723"
);
assert_eq!(results.get(2).unwrap().text.clone(), "fe80::2:1");
assert_eq!(
results.get(3).unwrap().text.clone(),
"fe80:22:312:fe::1%eth0"
);
assert_eq!(results.get(2).unwrap().text, "fe80::2:1");
assert_eq!(results.get(3).unwrap().text, "fe80:22:312:fe::1%eth0");
}
#[test]
@ -399,85 +413,81 @@ mod tests {
let lines = split(
"Lorem ipsum [link](https://github.io?foo=bar) ![](http://cdn.com/img.jpg) lorem",
);
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 2);
assert_eq!(results.get(0).unwrap().pattern.clone(), "markdown_url");
assert_eq!(
results.get(0).unwrap().text.clone(),
"https://github.io?foo=bar"
);
assert_eq!(results.get(1).unwrap().pattern.clone(), "markdown_url");
assert_eq!(
results.get(1).unwrap().text.clone(),
"http://cdn.com/img.jpg"
);
assert_eq!(results.get(0).unwrap().pattern, "markdown_url");
assert_eq!(results.get(0).unwrap().text, "https://github.io?foo=bar");
assert_eq!(results.get(1).unwrap().pattern, "markdown_url");
assert_eq!(results.get(1).unwrap().text, "http://cdn.com/img.jpg");
}
#[test]
fn match_urls() {
let lines = split("Lorem ipsum https://www.rust-lang.org/tools lorem\n Lorem ipsumhttps://crates.io lorem https://github.io?foo=bar lorem ssh://github.io");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 4);
assert_eq!(
results.get(0).unwrap().text.clone(),
results.get(0).unwrap().text,
"https://www.rust-lang.org/tools"
);
assert_eq!(results.get(0).unwrap().pattern.clone(), "url");
assert_eq!(results.get(1).unwrap().text.clone(), "https://crates.io");
assert_eq!(results.get(1).unwrap().pattern.clone(), "url");
assert_eq!(
results.get(2).unwrap().text.clone(),
"https://github.io?foo=bar"
);
assert_eq!(results.get(2).unwrap().pattern.clone(), "url");
assert_eq!(results.get(3).unwrap().text.clone(), "ssh://github.io");
assert_eq!(results.get(3).unwrap().pattern.clone(), "url");
assert_eq!(results.get(0).unwrap().pattern, "url");
assert_eq!(results.get(1).unwrap().text, "https://crates.io");
assert_eq!(results.get(1).unwrap().pattern, "url");
assert_eq!(results.get(2).unwrap().text, "https://github.io?foo=bar");
assert_eq!(results.get(2).unwrap().pattern, "url");
assert_eq!(results.get(3).unwrap().text, "ssh://github.io");
assert_eq!(results.get(3).unwrap().pattern, "url");
}
#[test]
fn match_addresses() {
let lines = split("Lorem 0xfd70b5695 0x5246ddf lorem\n Lorem 0x973113tlorem");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 3);
assert_eq!(results.get(0).unwrap().text.clone(), "0xfd70b5695");
assert_eq!(results.get(1).unwrap().text.clone(), "0x5246ddf");
assert_eq!(results.get(2).unwrap().text.clone(), "0x973113");
assert_eq!(results.get(0).unwrap().text, "0xfd70b5695");
assert_eq!(results.get(1).unwrap().text, "0x5246ddf");
assert_eq!(results.get(2).unwrap().text, "0x973113");
}
#[test]
fn match_hex_colors() {
let lines =
split("Lorem #fd7b56 lorem #FF00FF\n Lorem #00fF05 lorem #abcd00 lorem #afRR00");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 4);
assert_eq!(results.get(0).unwrap().text.clone(), "#fd7b56");
assert_eq!(results.get(1).unwrap().text.clone(), "#FF00FF");
assert_eq!(results.get(2).unwrap().text.clone(), "#00fF05");
assert_eq!(results.get(3).unwrap().text.clone(), "#abcd00");
assert_eq!(results.get(0).unwrap().text, "#fd7b56");
assert_eq!(results.get(1).unwrap().text, "#FF00FF");
assert_eq!(results.get(2).unwrap().text, "#00fF05");
assert_eq!(results.get(3).unwrap().text, "#abcd00");
}
#[test]
fn match_ipfs() {
let lines = split("Lorem QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ lorem Qmfoobar");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
assert_eq!(
results.get(0).unwrap().text.clone(),
results.get(0).unwrap().text,
"QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ"
);
}
@ -486,9 +496,10 @@ mod tests {
fn match_process_port() {
let lines =
split("Lorem 5695 52463 lorem\n Lorem 973113 lorem 99999 lorem 8888 lorem\n 23456 lorem 5432 lorem 23444");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 8);
}
@ -496,53 +507,72 @@ mod tests {
#[test]
fn match_diff_a() {
let lines = split("Lorem lorem\n--- a/src/main.rs");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().text.clone(), "src/main.rs");
assert_eq!(results.get(0).unwrap().text, "src/main.rs");
}
#[test]
fn match_diff_b() {
let lines = split("Lorem lorem\n+++ b/src/main.rs");
let custom = [].to_vec();
let named_pat = vec![];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 1);
assert_eq!(results.get(0).unwrap().text.clone(), "src/main.rs");
assert_eq!(results.get(0).unwrap().text, "src/main.rs");
}
#[test]
fn priority() {
let lines = split("Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem");
let named_pat = vec![];
let custom: Vec<String> = ["CUSTOM-[0-9]{4,}", "ISSUE-[0-9]{3}"]
.iter()
.map(|&s| s.to_string())
.collect();
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &custom, false).matches(false);
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 9);
assert_eq!(results.get(0).unwrap().text.clone(), "http://foo.bar");
assert_eq!(results.get(1).unwrap().text.clone(), "CUSTOM-52463");
assert_eq!(results.get(2).unwrap().text.clone(), "ISSUE-123");
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");
assert_eq!(results.get(1).unwrap().text, "CUSTOM-52463");
assert_eq!(results.get(2).unwrap().text, "ISSUE-123");
assert_eq!(results.get(3).unwrap().text, "/var/fd70b569/9999.log");
assert_eq!(results.get(4).unwrap().text, "52463");
assert_eq!(results.get(5).unwrap().text, "973113");
assert_eq!(
results.get(3).unwrap().text.clone(),
"/var/fd70b569/9999.log"
);
assert_eq!(results.get(4).unwrap().text.clone(), "52463");
assert_eq!(results.get(5).unwrap().text.clone(), "973113");
assert_eq!(
results.get(6).unwrap().text.clone(),
results.get(6).unwrap().text,
"123e4567-e89b-12d3-a456-426655440000"
);
assert_eq!(results.get(7).unwrap().text.clone(), "8888");
assert_eq!(results.get(7).unwrap().text, "8888");
assert_eq!(
results.get(8).unwrap().text.clone(),
results.get(8).unwrap().text,
"https://crates.io/23456/fd70b569"
);
}
#[test]
fn named_patterns() {
let lines = split("Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem");
use crate::regexes::parse_pattern_name;
let named_pat = vec![parse_pattern_name("url").unwrap()];
let custom = vec![];
let alphabet = Alphabet("abcd".to_string());
let results = State::new(&lines, &alphabet, &named_pat, &custom, false).matches(false);
assert_eq!(results.len(), 2);
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");
assert_eq!(
results.get(1).unwrap().text,
"https://crates.io/23456/fd70b569"
);
}