2020-06-01 09:16:56 +02:00
|
|
|
|
use std::collections;
|
|
|
|
|
|
|
2020-06-02 20:03:16 +02:00
|
|
|
|
use regex::Regex;
|
2020-05-31 22:45:36 +02:00
|
|
|
|
use sequence_trie::SequenceTrie;
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-05-30 19:28:54 +02:00
|
|
|
|
use crate::alphabets::Alphabet;
|
2020-06-01 07:30:00 +02:00
|
|
|
|
use crate::regexes::{NamedPattern, EXCLUDE_PATTERNS, PATTERNS};
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-06-01 15:25:54 +02:00
|
|
|
|
/// Holds data for the `Ui`.
|
2020-06-01 09:33:40 +02:00
|
|
|
|
pub struct Model<'a> {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
// buffer: &'a str,
|
|
|
|
|
|
pub lines: Vec<&'a str>,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
alphabet: &'a Alphabet,
|
2020-06-01 07:30:00 +02:00
|
|
|
|
named_patterns: &'a Vec<NamedPattern>,
|
2020-05-30 19:28:54 +02:00
|
|
|
|
custom_regexes: &'a Vec<String>,
|
2020-05-30 22:23:33 +02:00
|
|
|
|
pub reverse: bool,
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-06-01 09:33:40 +02:00
|
|
|
|
impl<'a> Model<'a> {
|
2020-05-27 10:04:42 +02:00
|
|
|
|
pub fn new(
|
2020-06-01 20:12:02 +02:00
|
|
|
|
buffer: &'a str,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
alphabet: &'a Alphabet,
|
2020-06-01 07:30:00 +02:00
|
|
|
|
named_patterns: &'a Vec<NamedPattern>,
|
2020-05-30 19:28:54 +02:00
|
|
|
|
custom_regexes: &'a Vec<String>,
|
2020-05-30 22:23:33 +02:00
|
|
|
|
reverse: bool,
|
2020-06-01 09:33:40 +02:00
|
|
|
|
) -> Model<'a> {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let lines = buffer.split('\n').collect();
|
|
|
|
|
|
|
2020-06-01 09:33:40 +02:00
|
|
|
|
Model {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
// buffer,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
lines,
|
|
|
|
|
|
alphabet,
|
2020-06-01 07:30:00 +02:00
|
|
|
|
named_patterns,
|
2020-05-30 19:28:54 +02:00
|
|
|
|
custom_regexes,
|
2020-05-30 22:23:33 +02:00
|
|
|
|
reverse,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
2020-05-31 22:45:36 +02:00
|
|
|
|
/// Returns a vector of `Match`es, each corresponding to a pattern match
|
|
|
|
|
|
/// in the lines, its location (x, y), and associated hint.
|
2020-05-30 22:23:33 +02:00
|
|
|
|
pub fn matches(&self, unique: bool) -> Vec<Match<'a>> {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
let mut raw_matches = self.raw_matches();
|
|
|
|
|
|
|
|
|
|
|
|
if self.reverse {
|
|
|
|
|
|
raw_matches.reverse();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut matches = self.associate_hints(&raw_matches, unique);
|
|
|
|
|
|
|
|
|
|
|
|
if self.reverse {
|
|
|
|
|
|
matches.reverse();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
matches
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-06-01 09:33:40 +02:00
|
|
|
|
/// Internal function that searches the model's lines for pattern matches.
|
2020-05-31 22:45:36 +02:00
|
|
|
|
/// Returns a vector of `RawMatch`es (text, location, pattern id) without
|
|
|
|
|
|
/// an associated hint. The hint is attached to `Match`, not to `RawMatch`.
|
2020-06-01 09:16:56 +02:00
|
|
|
|
///
|
|
|
|
|
|
/// # Notes
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Custom regexes have priority over other regexes.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// If no named patterns were specified, it will search for all available
|
|
|
|
|
|
/// patterns from the `PATTERNS` catalog.
|
2020-05-31 00:17:51 +02:00
|
|
|
|
fn raw_matches(&self) -> Vec<RawMatch<'a>> {
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let mut matches = Vec::new();
|
|
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
let exclude_regexes = EXCLUDE_PATTERNS
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.iter()
|
2020-05-31 00:17:51 +02:00
|
|
|
|
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
let custom_regexes = self
|
2020-05-30 19:28:54 +02:00
|
|
|
|
.custom_regexes
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.iter()
|
2020-05-31 00:17:51 +02:00
|
|
|
|
.map(|pattern| {
|
|
|
|
|
|
(
|
|
|
|
|
|
"custom",
|
|
|
|
|
|
Regex::new(pattern).expect("Invalid custom regexp"),
|
|
|
|
|
|
)
|
|
|
|
|
|
})
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let regexes = if self.named_patterns.is_empty() {
|
|
|
|
|
|
PATTERNS
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
|
|
|
|
|
|
.collect::<Vec<(&str, regex::Regex)>>()
|
|
|
|
|
|
} else {
|
|
|
|
|
|
self.named_patterns
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
.map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap()))
|
|
|
|
|
|
.collect::<Vec<(&str, regex::Regex)>>()
|
|
|
|
|
|
};
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
let all_regexes = [exclude_regexes, custom_regexes, regexes].concat();
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
for (index, line) in self.lines.iter().enumerate() {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
// Remainder of the line to be searched for matches.
|
|
|
|
|
|
// This advances iteratively, until no matches can be found.
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let mut chunk: &str = line;
|
|
|
|
|
|
let mut offset: i32 = 0;
|
|
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
// Use all avail regexes to match the chunk and select the match
|
|
|
|
|
|
// occuring the earliest on the chunk. Save its matched text and
|
|
|
|
|
|
// position in a `Match` struct.
|
2020-05-27 10:04:42 +02:00
|
|
|
|
loop {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
let chunk_matches = all_regexes
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.iter()
|
2020-05-31 00:17:51 +02:00
|
|
|
|
.filter_map(|(&ref name, regex)| match regex.find_iter(chunk).nth(0) {
|
2020-06-01 09:16:56 +02:00
|
|
|
|
Some(m) => Some((name, regex, m)),
|
2020-05-27 10:04:42 +02:00
|
|
|
|
None => None,
|
|
|
|
|
|
})
|
|
|
|
|
|
.collect::<Vec<_>>();
|
2020-05-31 00:17:51 +02:00
|
|
|
|
|
|
|
|
|
|
if chunk_matches.is_empty() {
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-06-01 09:16:56 +02:00
|
|
|
|
// First match on the chunk.
|
|
|
|
|
|
let (name, pattern, matching) = chunk_matches
|
2020-05-27 10:04:42 +02:00
|
|
|
|
.iter()
|
2020-05-31 00:17:51 +02:00
|
|
|
|
.min_by(|x, y| x.2.start().cmp(&y.2.start()))
|
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
|
|
let text = matching.as_str();
|
|
|
|
|
|
|
|
|
|
|
|
let captures = pattern
|
|
|
|
|
|
.captures(text)
|
|
|
|
|
|
.expect("At this stage the regex must have matched.");
|
|
|
|
|
|
|
|
|
|
|
|
// Handle both capturing and non-capturing patterns.
|
|
|
|
|
|
let (subtext, substart) = if let Some(capture) = captures.get(1) {
|
|
|
|
|
|
(capture.as_str(), capture.start())
|
2020-05-27 10:04:42 +02:00
|
|
|
|
} else {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
(text, 0)
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Never hint or break ansi color sequences.
|
|
|
|
|
|
if *name != "ansi_colors" {
|
|
|
|
|
|
matches.push(RawMatch {
|
|
|
|
|
|
x: offset + matching.start() as i32 + substart as i32,
|
|
|
|
|
|
y: index as i32,
|
|
|
|
|
|
pattern: name,
|
|
|
|
|
|
text: subtext,
|
|
|
|
|
|
});
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
2020-05-31 00:17:51 +02:00
|
|
|
|
|
|
|
|
|
|
chunk = chunk.get(matching.end()..).expect("Unknown chunk");
|
|
|
|
|
|
offset += matching.end() as i32;
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
matches
|
|
|
|
|
|
}
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
/// Associate a hint to each `RawMatch`, returning a vector of `Match`es.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// If `unique` is `true`, all duplicate matches will have the same hint.
|
|
|
|
|
|
/// For copying matched text, this seems easier and more natural.
|
|
|
|
|
|
/// If `unique` is `false`, duplicate matches will have their own hint.
|
|
|
|
|
|
fn associate_hints(&self, raw_matches: &Vec<RawMatch<'a>>, unique: bool) -> Vec<Match<'a>> {
|
|
|
|
|
|
let hints = self.alphabet.make_hints(raw_matches.len());
|
|
|
|
|
|
let mut hints_iter = hints.iter();
|
|
|
|
|
|
|
|
|
|
|
|
let mut result: Vec<Match<'a>> = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
if unique {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
// Map (text, hint)
|
2020-06-01 09:16:56 +02:00
|
|
|
|
let mut known: collections::HashMap<&str, &str> = collections::HashMap::new();
|
2020-05-31 00:17:51 +02:00
|
|
|
|
|
|
|
|
|
|
for raw_mat in raw_matches {
|
|
|
|
|
|
let hint: &str = known.entry(raw_mat.text).or_insert(
|
|
|
|
|
|
hints_iter
|
|
|
|
|
|
.next()
|
|
|
|
|
|
.expect("We should have as many hints as necessary, even invisible ones."),
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
result.push(Match {
|
|
|
|
|
|
x: raw_mat.x,
|
|
|
|
|
|
y: raw_mat.y,
|
|
|
|
|
|
pattern: raw_mat.pattern,
|
|
|
|
|
|
text: raw_mat.text,
|
|
|
|
|
|
hint: hint.to_string(),
|
|
|
|
|
|
});
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
} else {
|
2020-05-31 00:17:51 +02:00
|
|
|
|
for raw_mat in raw_matches {
|
|
|
|
|
|
let hint = hints_iter
|
|
|
|
|
|
.next()
|
|
|
|
|
|
.expect("We should have as many hints as necessary, even invisible ones.");
|
|
|
|
|
|
|
|
|
|
|
|
result.push(Match {
|
|
|
|
|
|
x: raw_mat.x,
|
|
|
|
|
|
y: raw_mat.y,
|
|
|
|
|
|
pattern: raw_mat.pattern,
|
|
|
|
|
|
text: raw_mat.text,
|
|
|
|
|
|
hint: hint.to_string(),
|
|
|
|
|
|
});
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
2020-05-31 00:17:51 +02:00
|
|
|
|
result
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
2020-05-31 22:45:36 +02:00
|
|
|
|
|
|
|
|
|
|
/// Builds a `SequenceTrie` that helps determine if a sequence of keys
|
|
|
|
|
|
/// entered by the user corresponds to a match. This kind of lookup
|
|
|
|
|
|
/// directly returns a reference to the corresponding `Match` if any.
|
|
|
|
|
|
pub fn build_lookup_trie(matches: &'a Vec<Match<'a>>) -> SequenceTrie<char, usize> {
|
|
|
|
|
|
let mut trie = SequenceTrie::new();
|
|
|
|
|
|
|
|
|
|
|
|
for (index, mat) in matches.iter().enumerate() {
|
|
|
|
|
|
let hint_chars = mat.hint.chars().collect::<Vec<char>>();
|
|
|
|
|
|
|
|
|
|
|
|
// no need to insert twice the same hint
|
|
|
|
|
|
if trie.get(&hint_chars).is_none() {
|
|
|
|
|
|
trie.insert_owned(hint_chars, index);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
trie
|
|
|
|
|
|
}
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-06-01 17:43:51 +02:00
|
|
|
|
/// Represents matched text, its location on screen, the pattern that created
|
|
|
|
|
|
/// it, and the associated hint.
|
2020-06-02 13:10:48 +02:00
|
|
|
|
#[derive(Debug)]
|
2020-06-01 17:43:51 +02:00
|
|
|
|
pub struct Match<'a> {
|
|
|
|
|
|
pub x: i32,
|
|
|
|
|
|
pub y: i32,
|
|
|
|
|
|
pub pattern: &'a str,
|
|
|
|
|
|
pub text: &'a str,
|
|
|
|
|
|
pub hint: String,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Internal surrogate for `Match`, before a Hint has been associated.
|
2020-06-02 13:10:48 +02:00
|
|
|
|
#[derive(Debug)]
|
2020-06-01 17:43:51 +02:00
|
|
|
|
struct RawMatch<'a> {
|
|
|
|
|
|
pub x: i32,
|
|
|
|
|
|
pub y: i32,
|
|
|
|
|
|
pub pattern: &'a str,
|
|
|
|
|
|
pub text: &'a str,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
mod tests {
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
use crate::alphabets::Alphabet;
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_reverse() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(results.len(), 3);
|
2020-05-31 00:17:51 +02:00
|
|
|
|
assert_eq!(results.first().unwrap().hint, "a");
|
|
|
|
|
|
assert_eq!(results.last().unwrap().hint, "c");
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_unique() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(true);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 3);
|
2020-05-31 00:17:51 +02:00
|
|
|
|
assert_eq!(results.first().unwrap().hint, "a");
|
|
|
|
|
|
assert_eq!(results.last().unwrap().hint, "a");
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_docker() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "latest sha256:30557a29d5abc51e5f1d5b472e79b7e296f595abcf19fe6b9199dbbc809c6ff4 20 hours ago";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
results.get(0).unwrap().text,
|
|
|
|
|
|
"30557a29d5abc51e5f1d5b472e79b7e296f595abcf19fe6b9199dbbc809c6ff4"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[test]
|
2020-05-31 00:17:51 +02:00
|
|
|
|
fn match_ansi_colors() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "path: [32m/var/log/nginx.log[m\npath: [32mtest/log/nginx-2.log:32[mfolder/.nginx@4df2.log";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 3);
|
|
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "/var/log/nginx.log");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "test/log/nginx-2.log");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "folder/.nginx@4df2.log");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_paths() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem /tmp/foo/bar_lol, lorem\n Lorem /var/log/boot-strap.log lorem ../log/kern.log lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 3);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "/tmp/foo/bar_lol");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "/var/log/boot-strap.log");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "../log/kern.log");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_home() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem ~/.gnu/.config.txt, lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "~/.gnu/.config.txt");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
2020-06-01 07:30:00 +02:00
|
|
|
|
fn match_uuids() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem ipsum 123e4567-e89b-12d3-a456-426655440000 lorem\n Lorem lorem lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_shas() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem fd70b5695 5246ddf f924213 lorem\n Lorem 973113963b491874ab2e372ee60d4b4cb75f717c lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 4);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "fd70b5695");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "5246ddf");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "f924213");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(3).unwrap().text,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
"973113963b491874ab2e372ee60d4b4cb75f717c"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_ips() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem ipsum 127.0.0.1 lorem\n Lorem 255.255.10.255 lorem 127.0.0.1 lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 3);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "127.0.0.1");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "255.255.10.255");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "127.0.0.1");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_ipv6s() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem ipsum fe80::2:202:fe4 lorem\n Lorem 2001:67c:670:202:7ba8:5e41:1591:d723 lorem fe80::2:1 lorem ipsum fe80:22:312:fe::1%eth0";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 4);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "fe80::2:202:fe4");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(1).unwrap().text,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
"2001:67c:670:202:7ba8:5e41:1591:d723"
|
|
|
|
|
|
);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "fe80::2:1");
|
|
|
|
|
|
assert_eq!(results.get(3).unwrap().text, "fe80:22:312:fe::1%eth0");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_markdown_urls() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer =
|
|
|
|
|
|
"Lorem ipsum [link](https://github.io?foo=bar)  lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 2);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().pattern, "markdown_url");
|
|
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "https://github.io?foo=bar");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().pattern, "markdown_url");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "http://cdn.com/img.jpg");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_urls() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem ipsum https://www.rust-lang.org/tools lorem\n Lorem ipsumhttps://crates.io lorem https://github.io?foo=bar lorem ssh://github.io";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 4);
|
|
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(0).unwrap().text,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
"https://www.rust-lang.org/tools"
|
|
|
|
|
|
);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().pattern, "url");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "https://crates.io");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().pattern, "url");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "https://github.io?foo=bar");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().pattern, "url");
|
|
|
|
|
|
assert_eq!(results.get(3).unwrap().text, "ssh://github.io");
|
|
|
|
|
|
assert_eq!(results.get(3).unwrap().pattern, "url");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-06-04 07:16:07 +02:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_emails() {
|
|
|
|
|
|
let buffer =
|
|
|
|
|
|
"Lorem ipsum <first.last+social@example.com> john@server.department.company.com lorem";
|
|
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
|
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
|
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 2);
|
|
|
|
|
|
assert_eq!(results.get(0).unwrap().pattern, "email");
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
results.get(0).unwrap().text,
|
|
|
|
|
|
"first.last+social@example.com"
|
|
|
|
|
|
);
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().pattern, "email");
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
results.get(1).unwrap().text,
|
|
|
|
|
|
"john@server.department.company.com"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-05-27 10:04:42 +02:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_addresses() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem 0xfd70b5695 0x5246ddf lorem\n Lorem 0x973113tlorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 3);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "0xfd70b5695");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "0x5246ddf");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "0x973113");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_hex_colors() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem #fd7b56 lorem #FF00FF\n Lorem #00fF05 lorem #abcd00 lorem #afRR00";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 4);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "#fd7b56");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "#FF00FF");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "#00fF05");
|
|
|
|
|
|
assert_eq!(results.get(3).unwrap().text, "#abcd00");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_ipfs() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ lorem Qmfoobar";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
|
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(0).unwrap().text,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
"QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_process_port() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem 5695 52463 lorem\n Lorem 973113 lorem 99999 lorem 8888 lorem\n 23456 lorem 5432 lorem 23444";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 8);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_diff_a() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem lorem\n--- a/src/main.rs";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "src/main.rs");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn match_diff_b() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem lorem\n+++ b/src/main.rs";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
|
|
|
|
|
let custom = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 1);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "src/main.rs");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn priority() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem";
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
2020-06-01 07:30:00 +02:00
|
|
|
|
let named_pat = vec![];
|
2020-05-27 10:04:42 +02:00
|
|
|
|
let custom: Vec<String> = ["CUSTOM-[0-9]{4,}", "ISSUE-[0-9]{3}"]
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
.map(|&s| s.to_string())
|
|
|
|
|
|
.collect();
|
|
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-05-27 10:04:42 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 9);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");
|
|
|
|
|
|
assert_eq!(results.get(1).unwrap().text, "CUSTOM-52463");
|
|
|
|
|
|
assert_eq!(results.get(2).unwrap().text, "ISSUE-123");
|
|
|
|
|
|
assert_eq!(results.get(3).unwrap().text, "/var/fd70b569/9999.log");
|
|
|
|
|
|
assert_eq!(results.get(4).unwrap().text, "52463");
|
|
|
|
|
|
assert_eq!(results.get(5).unwrap().text, "973113");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(6).unwrap().text,
|
|
|
|
|
|
"123e4567-e89b-12d3-a456-426655440000"
|
2020-05-27 10:04:42 +02:00
|
|
|
|
);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
assert_eq!(results.get(7).unwrap().text, "8888");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(8).unwrap().text,
|
|
|
|
|
|
"https://crates.io/23456/fd70b569"
|
2020-05-27 10:04:42 +02:00
|
|
|
|
);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn named_patterns() {
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem";
|
2020-06-01 07:30:00 +02:00
|
|
|
|
|
|
|
|
|
|
use crate::regexes::parse_pattern_name;
|
|
|
|
|
|
let named_pat = vec![parse_pattern_name("url").unwrap()];
|
|
|
|
|
|
|
|
|
|
|
|
let custom = vec![];
|
|
|
|
|
|
let alphabet = Alphabet("abcd".to_string());
|
2020-06-01 20:12:02 +02:00
|
|
|
|
let results = Model::new(buffer, &alphabet, &named_pat, &custom, false).matches(false);
|
2020-06-01 07:30:00 +02:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(results.len(), 2);
|
|
|
|
|
|
assert_eq!(results.get(0).unwrap().text, "http://foo.bar");
|
2020-05-27 10:04:42 +02:00
|
|
|
|
assert_eq!(
|
2020-06-01 07:30:00 +02:00
|
|
|
|
results.get(1).unwrap().text,
|
2020-05-27 10:04:42 +02:00
|
|
|
|
"https://crates.io/23456/fd70b569"
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
2020-06-02 20:03:16 +02:00
|
|
|
|
}
|