From 4ec240c2b1a125c212e362bb4ff20d544800cebb Mon Sep 17 00:00:00 2001 From: graelo Date: Mon, 22 Mar 2021 08:48:51 +0100 Subject: [PATCH] refactor: model receives buffer & computes matches --- src/lib.rs | 12 +- src/textbuf/mod.rs | 105 ++++++----- src/textbuf/model.rs | 412 +++++++++++++++++++++---------------------- src/ui/vc.rs | 54 +++--- 4 files changed, 304 insertions(+), 279 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d134a09..2a23b59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,8 +10,10 @@ pub mod ui; /// /// Maybe the decision to take ownership of the buffer is a bit bold. pub fn run(buffer: String, opt: &config::basic::Config) -> Option { - let mut model = textbuf::Model::new( - &buffer, + let lines = buffer.split('\n').collect::>(); + + let model = textbuf::Model::new( + &lines, &opt.alphabet, opt.use_all_patterns, &opt.named_patterns, @@ -20,6 +22,10 @@ pub fn run(buffer: String, opt: &config::basic::Config) -> Option opt.unique_hint, ); + if model.matches.is_empty() { + return None; + } + let hint_style = match &opt.hint_style { None => None, Some(style) => match style { @@ -37,7 +43,7 @@ pub fn run(buffer: String, opt: &config::basic::Config) -> Option let selection: Option = { let mut ui = ui::ViewController::new( - &mut model, + &model, opt.focus_wrap_around, default_output_destination, &opt.colors, diff --git a/src/textbuf/mod.rs b/src/textbuf/mod.rs index 77bfe0b..af6abe6 100644 --- a/src/textbuf/mod.rs +++ b/src/textbuf/mod.rs @@ -15,6 +15,7 @@ mod tests { #[test] fn match_reverse() { let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -22,7 +23,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -30,7 +31,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.first().unwrap().hint, "a"); @@ -40,6 +41,7 @@ mod tests { #[test] fn match_unique() { let buffer = "lorem 127.0.0.1 lorem 255.255.255.255 lorem 127.0.0.1 lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -47,7 +49,7 @@ mod tests { let reverse = false; let unique_hint = true; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -55,7 +57,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.first().unwrap().hint, "a"); @@ -65,6 +67,7 @@ mod tests { #[test] fn match_docker() { let buffer = "latest sha256:30557a29d5abc51e5f1d5b472e79b7e296f595abcf19fe6b9199dbbc809c6ff4 20 hours ago"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -72,7 +75,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -80,7 +83,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); assert_eq!( @@ -93,6 +96,7 @@ mod tests { fn match_ansi_colors() { let buffer = "path: /var/log/nginx.log\npath: test/log/nginx-2.log:32folder/.nginx@4df2.log"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -100,7 +104,7 @@ mod tests { let reverse = true; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -108,7 +112,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.get(0).unwrap().text, "/var/log/nginx.log"); @@ -120,6 +124,7 @@ mod tests { fn match_paths() { let buffer = "Lorem /tmp/foo/bar_lol, lorem\n Lorem /var/log/boot-strap.log lorem ../log/kern.log lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -127,7 +132,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -135,7 +140,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.get(0).unwrap().text, "/tmp/foo/bar_lol"); @@ -146,6 +151,7 @@ mod tests { #[test] fn match_home() { let buffer = "Lorem ~/.gnu/.config.txt, lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -153,7 +159,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -161,7 +167,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); assert_eq!(results.get(0).unwrap().text, "~/.gnu/.config.txt"); @@ -170,6 +176,7 @@ mod tests { #[test] fn match_uuids() { let buffer = "Lorem ipsum 123e4567-e89b-12d3-a456-426655440000 lorem\n Lorem lorem lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -177,7 +184,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -185,7 +192,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); } @@ -193,6 +200,7 @@ mod tests { #[test] fn match_shas() { let buffer = "Lorem fd70b5695 5246ddf f924213 lorem\n Lorem 973113963b491874ab2e372ee60d4b4cb75f717c lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -200,7 +208,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -208,7 +216,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 4); assert_eq!(results.get(0).unwrap().text, "fd70b5695"); @@ -223,6 +231,7 @@ mod tests { #[test] fn match_ipv4s() { let buffer = "Lorem ipsum 127.0.0.1 lorem\n Lorem 255.255.10.255 lorem 127.0.0.1 lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -230,7 +239,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -238,7 +247,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.get(0).unwrap().pattern, "ipv4"); @@ -252,6 +261,7 @@ mod tests { #[test] fn match_ipv6s() { let buffer = "Lorem ipsum fe80::2:202:fe4 lorem\n Lorem 2001:67c:670:202:7ba8:5e41:1591:d723 lorem fe80::2:1 lorem ipsum fe80:22:312:fe::1%eth0"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -259,7 +269,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -267,7 +277,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 4); assert_eq!(results.get(0).unwrap().text, "fe80::2:202:fe4"); @@ -283,6 +293,7 @@ mod tests { fn match_markdown_urls() { let buffer = "Lorem ipsum [link](https://github.io?foo=bar) ![](http://cdn.com/img.jpg) lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -290,7 +301,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -298,7 +309,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 2); assert_eq!(results.get(0).unwrap().pattern, "markdown-url"); @@ -310,6 +321,7 @@ mod tests { #[test] fn match_urls() { let buffer = "Lorem ipsum https://www.rust-lang.org/tools lorem\n Lorem ipsumhttps://crates.io lorem https://github.io?foo=bar lorem ssh://github.io"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -317,7 +329,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -325,7 +337,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 4); assert_eq!( @@ -345,6 +357,7 @@ mod tests { fn match_emails() { let buffer = "Lorem ipsum john@server.department.company.com lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -352,7 +365,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -360,7 +373,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 2); assert_eq!(results.get(0).unwrap().pattern, "email"); @@ -378,6 +391,7 @@ mod tests { #[test] fn match_addresses() { let buffer = "Lorem 0xfd70b5695 0x5246ddf lorem\n Lorem 0x973113tlorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -385,7 +399,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -393,7 +407,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 3); assert_eq!(results.get(0).unwrap().pattern, "mem-address"); @@ -407,6 +421,7 @@ mod tests { #[test] fn match_hex_colors() { let buffer = "Lorem #fd7b56 lorem #FF00FF\n Lorem #00fF05 lorem #abcd00 lorem #afRR00"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -414,7 +429,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -422,7 +437,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 4); assert_eq!(results.get(0).unwrap().text, "#fd7b56"); @@ -434,6 +449,7 @@ mod tests { #[test] fn match_ipfs() { let buffer = "Lorem QmRdbNSxDJBXmssAc9fvTtux4duptMvfSGiGuq6yHAQVKQ lorem Qmfoobar"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -441,7 +457,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -449,7 +465,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); assert_eq!( @@ -461,6 +477,7 @@ mod tests { #[test] fn match_process_port() { let buffer = "Lorem 5695 52463 lorem\n Lorem 973113 lorem 99999 lorem 8888 lorem\n 23456 lorem 5432 lorem 23444"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -468,7 +485,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -476,7 +493,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 8); } @@ -484,6 +501,7 @@ mod tests { #[test] fn match_diff_a() { let buffer = "Lorem lorem\n--- a/src/main.rs"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -491,7 +509,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -499,7 +517,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); assert_eq!(results.get(0).unwrap().pattern, "diff-a"); @@ -509,6 +527,7 @@ mod tests { #[test] fn match_diff_b() { let buffer = "Lorem lorem\n+++ b/src/main.rs"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom = vec![]; @@ -516,7 +535,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -524,7 +543,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 1); assert_eq!(results.get(0).unwrap().pattern, "diff-b"); @@ -534,6 +553,7 @@ mod tests { #[test] fn priority_between_regexes() { let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; let custom: Vec = ["CUSTOM-[0-9]{4,}", "ISSUE-[0-9]{3}"] @@ -544,7 +564,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -552,7 +572,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 9); assert_eq!(results.get(0).unwrap().text, "http://foo.bar"); @@ -575,6 +595,7 @@ mod tests { #[test] fn named_patterns() { let buffer = "Lorem [link](http://foo.bar) ipsum CUSTOM-52463 lorem ISSUE-123 lorem\nLorem /var/fd70b569/9999.log 52463 lorem\n Lorem 973113 lorem 123e4567-e89b-12d3-a456-426655440000 lorem 8888 lorem\n https://crates.io/23456/fd70b569 lorem"; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = false; use crate::textbuf::regexes::parse_pattern_name; @@ -585,7 +606,7 @@ mod tests { let reverse = false; let unique_hint = false; let results = Model::new( - buffer, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -593,7 +614,7 @@ mod tests { reverse, unique_hint, ) - .matches(); + .matches; assert_eq!(results.len(), 2); assert_eq!(results.get(0).unwrap().text, "http://foo.bar"); diff --git a/src/textbuf/model.rs b/src/textbuf/model.rs index b0d5b06..9208e35 100644 --- a/src/textbuf/model.rs +++ b/src/textbuf/model.rs @@ -11,18 +11,16 @@ use super::regexes::{NamedPattern, EXCLUDE_PATTERNS, PATTERNS}; /// Holds data for the `Ui`. pub struct Model<'a> { // buffer: &'a str, - pub lines: Vec<&'a str>, - alphabet: &'a Alphabet, - use_all_patterns: bool, - named_patterns: &'a [NamedPattern], - custom_patterns: &'a [String], + pub lines: &'a [&'a str], pub reverse: bool, - unique_hint: bool, + pub matches: Vec>, + pub lookup_trie: SequenceTrie, } impl<'a> Model<'a> { pub fn new( - buffer: &'a str, + // buffer: &'a str, + lines: &'a [&'a str], alphabet: &'a Alphabet, use_all_patterns: bool, named_patterns: &'a [NamedPattern], @@ -30,210 +28,212 @@ impl<'a> Model<'a> { reverse: bool, unique_hint: bool, ) -> Model<'a> { - let lines = buffer.split('\n').collect(); + // let lines = buffer.split('\n').collect::>(); + + let mut raw_matches = + raw_matches(&lines, named_patterns, custom_patterns, use_all_patterns); + + if reverse { + raw_matches.reverse(); + } + + let mut matches = associate_hints(&raw_matches, alphabet, unique_hint); + + if reverse { + matches.reverse(); + } + + let lookup_trie = build_lookup_trie(&matches); Model { // buffer, lines, - alphabet, - use_all_patterns, - named_patterns, - custom_patterns, reverse, - unique_hint, + matches, + lookup_trie, } } - - /// Returns a vector of `Match`es, each corresponding to a pattern match - /// in the lines, its location (x, y), and associated hint. - pub fn matches(&self) -> Vec> { - let mut raw_matches = self.raw_matches(); - - if self.reverse { - raw_matches.reverse(); - } - - let mut matches = self.associate_hints(&raw_matches, self.unique_hint); - - if self.reverse { - matches.reverse(); - } - - matches - } - - /// Internal function that searches the model's lines for pattern matches. - /// Returns a vector of `RawMatch`es (text, location, pattern id) without - /// an associated hint. The hint is attached to `Match`, not to `RawMatch`. - /// - /// # Notes - /// - /// Custom regexes have priority over other regexes. - /// - /// If no named patterns were specified, it will search for all available - /// patterns from the `PATTERNS` catalog. - fn raw_matches(&self) -> Vec> { - let exclude_regexes = EXCLUDE_PATTERNS - .iter() - .map(|&(name, pattern)| (name, Regex::new(pattern).unwrap())) - .collect::>(); - - let custom_regexes = self - .custom_patterns - .iter() - .map(|pattern| { - ( - "custom", - Regex::new(pattern).expect("Invalid custom regexp"), - ) - }) - .collect::>(); - - let regexes = if self.use_all_patterns { - PATTERNS - .iter() - .map(|&(name, pattern)| (name, Regex::new(pattern).unwrap())) - .collect::>() - } else { - self.named_patterns - .iter() - .map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap())) - .collect::>() - }; - - let all_regexes = [exclude_regexes, custom_regexes, regexes].concat(); - - let mut raw_matches = Vec::new(); - - for (index, line) in self.lines.iter().enumerate() { - // Chunk is the remainder of the line to be searched for matches. - // This advances iteratively, until no matches can be found. - let mut chunk: &str = line; - let mut offset: i32 = 0; - - // Use all avail regexes to match the chunk and select the match - // occuring the earliest on the chunk. Save its matched text and - // position in a `RawMatch` struct. - loop { - // For each avalable regex, use the `find_iter` iterator to - // get the first non-overlapping match in the chunk, returning - // the start and end byte indices with respect to the chunk. - let chunk_matches = all_regexes - .iter() - .filter_map(|(&ref pat_name, reg)| match reg.find_iter(chunk).next() { - Some(reg_match) => Some((pat_name, reg, reg_match)), - None => None, - }) - .collect::>(); - - if chunk_matches.is_empty() { - break; - } - - // First match on the chunk. - let (pat_name, reg, reg_match) = chunk_matches - .iter() - .min_by_key(|element| element.2.start()) - .unwrap(); - - // Never hint or break ansi color sequences. - if *pat_name != "ansi_colors" { - let text = reg_match.as_str(); - - // In case the pattern has a capturing group, try obtaining - // that text and start offset, else use the entire match. - let (subtext, substart) = match reg - .captures_iter(text) - .next() - .expect("This regex is guaranteed to match.") - .get(1) - { - Some(capture) => (capture.as_str(), capture.start()), - None => (text, 0), - }; - - raw_matches.push(RawMatch { - x: offset + reg_match.start() as i32 + substart as i32, - y: index as i32, - pattern: pat_name, - text: subtext, - }); - } - - chunk = chunk - .get(reg_match.end()..) - .expect("The chunk must be larger than the regex match."); - offset += reg_match.end() as i32; - } - } - - raw_matches - } - - /// Associate a hint to each `RawMatch`, returning a vector of `Match`es. - /// - /// If `unique` is `true`, all duplicate matches will have the same hint. - /// For copying matched text, this seems easier and more natural. - /// If `unique` is `false`, duplicate matches will have their own hint. - fn associate_hints(&self, raw_matches: &[RawMatch<'a>], unique: bool) -> Vec> { - let hints = self.alphabet.make_hints(raw_matches.len()); - let mut hints_iter = hints.iter(); - - let mut result: Vec> = vec![]; - - if unique { - // Map (text, hint) - let mut known: collections::HashMap<&str, &str> = collections::HashMap::new(); - - for raw_mat in raw_matches { - let hint: &str = known.entry(raw_mat.text).or_insert_with(|| { - hints_iter - .next() - .expect("We should have as many hints as necessary, even invisible ones.") - }); - - result.push(Match { - x: raw_mat.x, - y: raw_mat.y, - pattern: raw_mat.pattern, - text: raw_mat.text, - hint: hint.to_string(), - }); - } - } else { - for raw_mat in raw_matches { - let hint = hints_iter - .next() - .expect("We should have as many hints as necessary, even invisible ones."); - - result.push(Match { - x: raw_mat.x, - y: raw_mat.y, - pattern: raw_mat.pattern, - text: raw_mat.text, - hint: hint.to_string(), - }); - } - } - - result - } - - /// Builds a `SequenceTrie` that helps determine if a sequence of keys - /// entered by the user corresponds to a match. This kind of lookup - /// directly returns a reference to the corresponding `Match` if any. - pub fn build_lookup_trie(matches: &'a [Match<'a>]) -> SequenceTrie { - let mut trie = SequenceTrie::new(); - - for (index, mat) in matches.iter().enumerate() { - let hint_chars = mat.hint.chars().collect::>(); - - // no need to insert twice the same hint - if trie.get(&hint_chars).is_none() { - trie.insert_owned(hint_chars, index); - } - } - - trie - } +} + +/// Internal function that searches the model's lines for pattern matches. +/// Returns a vector of `RawMatch`es (text, location, pattern id) without +/// an associated hint. The hint is attached to `Match`, not to `RawMatch`. +/// +/// # Notes +/// +/// Custom regexes have priority over other regexes. +/// +/// If no named patterns were specified, it will search for all available +/// patterns from the `PATTERNS` catalog. +fn raw_matches<'a>( + lines: &'a [&'a str], + named_patterns: &'a [NamedPattern], + custom_patterns: &'a [String], + use_all_patterns: bool, +) -> Vec> { + let exclude_regexes = EXCLUDE_PATTERNS + .iter() + .map(|&(name, pattern)| (name, Regex::new(pattern).unwrap())) + .collect::>(); + + let custom_regexes = custom_patterns + .iter() + .map(|pattern| { + ( + "custom", + Regex::new(pattern).expect("Invalid custom regexp"), + ) + }) + .collect::>(); + + let regexes = if use_all_patterns { + PATTERNS + .iter() + .map(|&(name, pattern)| (name, Regex::new(pattern).unwrap())) + .collect::>() + } else { + named_patterns + .iter() + .map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap())) + .collect::>() + }; + + let all_regexes = [exclude_regexes, custom_regexes, regexes].concat(); + + let mut raw_matches = Vec::new(); + + for (index, line) in lines.iter().enumerate() { + // Chunk is the remainder of the line to be searched for matches. + // This advances iteratively, until no matches can be found. + let mut chunk: &str = line; + let mut offset: i32 = 0; + + // Use all avail regexes to match the chunk and select the match + // occuring the earliest on the chunk. Save its matched text and + // position in a `RawMatch` struct. + loop { + // For each avalable regex, use the `find_iter` iterator to + // get the first non-overlapping match in the chunk, returning + // the start and end byte indices with respect to the chunk. + let chunk_matches = all_regexes + .iter() + .filter_map(|(&ref pat_name, reg)| match reg.find_iter(chunk).next() { + Some(reg_match) => Some((pat_name, reg, reg_match)), + None => None, + }) + .collect::>(); + + if chunk_matches.is_empty() { + break; + } + + // First match on the chunk. + let (pat_name, reg, reg_match) = chunk_matches + .iter() + .min_by_key(|element| element.2.start()) + .unwrap(); + + // Never hint or break ansi color sequences. + if *pat_name != "ansi_colors" { + let text = reg_match.as_str(); + + // In case the pattern has a capturing group, try obtaining + // that text and start offset, else use the entire match. + let (subtext, substart) = match reg + .captures_iter(text) + .next() + .expect("This regex is guaranteed to match.") + .get(1) + { + Some(capture) => (capture.as_str(), capture.start()), + None => (text, 0), + }; + + raw_matches.push(RawMatch { + x: offset + reg_match.start() as i32 + substart as i32, + y: index as i32, + pattern: pat_name, + text: subtext, + }); + } + + chunk = chunk + .get(reg_match.end()..) + .expect("The chunk must be larger than the regex match."); + offset += reg_match.end() as i32; + } + } + + raw_matches +} + +/// Associate a hint to each `RawMatch`, returning a vector of `Match`es. +/// +/// If `unique` is `true`, all duplicate matches will have the same hint. +/// For copying matched text, this seems easier and more natural. +/// If `unique` is `false`, duplicate matches will have their own hint. +fn associate_hints<'a>( + raw_matches: &[RawMatch<'a>], + alphabet: &'a Alphabet, + unique: bool, +) -> Vec> { + let hints = alphabet.make_hints(raw_matches.len()); + let mut hints_iter = hints.iter(); + + let mut result: Vec> = vec![]; + + if unique { + // Map (text, hint) + let mut known: collections::HashMap<&str, &str> = collections::HashMap::new(); + + for raw_mat in raw_matches { + let hint: &str = known.entry(raw_mat.text).or_insert_with(|| { + hints_iter + .next() + .expect("We should have as many hints as necessary, even invisible ones.") + }); + + result.push(Match { + x: raw_mat.x, + y: raw_mat.y, + pattern: raw_mat.pattern, + text: raw_mat.text, + hint: hint.to_string(), + }); + } + } else { + for raw_mat in raw_matches { + let hint = hints_iter + .next() + .expect("We should have as many hints as necessary, even invisible ones."); + + result.push(Match { + x: raw_mat.x, + y: raw_mat.y, + pattern: raw_mat.pattern, + text: raw_mat.text, + hint: hint.to_string(), + }); + } + } + + result +} + +/// Builds a `SequenceTrie` that helps determine if a sequence of keys +/// entered by the user corresponds to a match. This kind of lookup +/// directly returns a reference to the corresponding `Match` if any. +fn build_lookup_trie<'a>(matches: &'a [Match<'a>]) -> SequenceTrie { + let mut trie = SequenceTrie::new(); + + for (index, mat) in matches.iter().enumerate() { + let hint_chars = mat.hint.chars().collect::>(); + + // no need to insert twice the same hint + if trie.get(&hint_chars).is_none() { + trie.insert_owned(hint_chars, index); + } + } + + trie } diff --git a/src/ui/vc.rs b/src/ui/vc.rs index 255d594..6dcfc24 100644 --- a/src/ui/vc.rs +++ b/src/ui/vc.rs @@ -2,7 +2,6 @@ use std::char; use std::cmp; use std::io; -use sequence_trie::SequenceTrie; use termion::{self, color, cursor, event, style}; use super::colors::UiColors; @@ -11,11 +10,9 @@ use super::{HintAlignment, HintStyle}; use crate::{config::extended::OutputDestination, textbuf}; pub struct ViewController<'a> { - model: &'a mut textbuf::Model<'a>, + model: &'a textbuf::Model<'a>, term_width: u16, line_offsets: Vec, - matches: Vec>, - lookup_trie: SequenceTrie, focus_index: usize, focus_wrap_around: bool, default_output_destination: OutputDestination, @@ -28,16 +25,18 @@ impl<'a> ViewController<'a> { // Initialize {{{1 pub fn new( - model: &'a mut textbuf::Model<'a>, + model: &'a textbuf::Model<'a>, focus_wrap_around: bool, default_output_destination: OutputDestination, rendering_colors: &'a UiColors, hint_alignment: &'a HintAlignment, hint_style: Option, ) -> ViewController<'a> { - let matches = model.matches(); - let lookup_trie = textbuf::Model::build_lookup_trie(&matches); - let focus_index = if model.reverse { matches.len() - 1 } else { 0 }; + let focus_index = if model.reverse { + model.matches.len() - 1 + } else { + 0 + }; let (term_width, _) = termion::terminal_size().unwrap_or((80u16, 30u16)); // .expect("Cannot read the terminal size."); let line_offsets = get_line_offsets(&model.lines, term_width); @@ -46,8 +45,6 @@ impl<'a> ViewController<'a> { model, term_width, line_offsets, - matches, - lookup_trie, focus_index, focus_wrap_around, default_output_destination, @@ -106,7 +103,7 @@ impl<'a> ViewController<'a> { let old_index = self.focus_index; if self.focus_wrap_around { if self.focus_index == 0 { - self.focus_index = self.matches.len() - 1; + self.focus_index = self.model.matches.len() - 1; } else { self.focus_index -= 1; } @@ -122,12 +119,12 @@ impl<'a> ViewController<'a> { fn next_focus_index(&mut self) -> (usize, usize) { let old_index = self.focus_index; if self.focus_wrap_around { - if self.focus_index == self.matches.len() - 1 { + if self.focus_index == self.model.matches.len() - 1 { self.focus_index = 0; } else { self.focus_index += 1; } - } else if self.focus_index < self.matches.len() - 1 { + } else if self.focus_index < self.model.matches.len() - 1 { self.focus_index += 1; } let new_index = self.focus_index; @@ -379,7 +376,7 @@ impl<'a> ViewController<'a> { &self.rendering_colors, ); - for (index, mat) in self.matches.iter().enumerate() { + for (index, mat) in self.model.matches.iter().enumerate() { let focused = index == self.focus_index; self.render_match(stdout, mat, focused); } @@ -396,12 +393,12 @@ impl<'a> ViewController<'a> { new_focus_index: usize, ) { // Render the previously focused match as non-focused - let mat = self.matches.get(old_focus_index).unwrap(); + let mat = self.model.matches.get(old_focus_index).unwrap(); let focused = false; self.render_match(stdout, mat, focused); // Render the previously focused match as non-focused - let mat = self.matches.get(new_focus_index).unwrap(); + let mat = self.model.matches.get(new_focus_index).unwrap(); let focused = true; self.render_match(stdout, mat, focused); @@ -420,7 +417,7 @@ impl<'a> ViewController<'a> { fn listen(&mut self, reader: &mut dyn io::Read, writer: &mut dyn io::Write) -> Event { use termion::input::TermRead; // Trait for `reader.keys().next()`. - if self.matches.is_empty() { + if self.model.matches.is_empty() { return Event::Exit; } @@ -487,7 +484,7 @@ impl<'a> ViewController<'a> { // Yank/copy event::Key::Char(_ch @ 'y') | event::Key::Char(_ch @ '\n') => { - let text = self.matches.get(self.focus_index).unwrap().text; + let text = self.model.matches.get(self.focus_index).unwrap().text; return Event::Match(Selection { text: text.to_string(), uppercased: false, @@ -495,7 +492,7 @@ impl<'a> ViewController<'a> { }); } event::Key::Char(_ch @ 'Y') => { - let text = self.matches.get(self.focus_index).unwrap().text; + let text = self.model.matches.get(self.focus_index).unwrap().text; return Event::Match(Selection { text: text.to_string(), uppercased: true, @@ -526,6 +523,7 @@ impl<'a> ViewController<'a> { typed_hint.push_str(&lower_key); let node = self + .model .lookup_trie .get_node(&typed_hint.chars().collect::>()); @@ -540,7 +538,7 @@ impl<'a> ViewController<'a> { let match_index = node.value().expect( "By construction, the Lookup Trie should have a value for each leaf.", ); - let mat = self.matches.get(*match_index).expect("By construction, the value in a leaf should correspond to an existing hint."); + let mat = self.model.matches.get(*match_index).expect("By construction, the value in a leaf should correspond to an existing hint."); let text = mat.text.to_string(); return Event::Match(Selection { text, @@ -880,18 +878,19 @@ path: /usr/local/bin/cargo"; #[test] /// Simulates rendering without any match. fn test_render_full_without_matches() { - let content = "lorem 127.0.0.1 lorem + let buffer = "lorem 127.0.0.1 lorem Barcelona https://en.wikipedia.org/wiki/Barcelona - "; + let lines = buffer.split('\n').collect::>(); - let use_all_patterns = true; + let use_all_patterns = false; let named_pat = vec![]; let custom_patterns = vec![]; let alphabet = alphabet::Alphabet("abcd".to_string()); let reverse = false; let unique_hint = false; let mut model = textbuf::Model::new( - content, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -918,8 +917,6 @@ Barcelona https://en.wikipedia.org/wiki/Barcelona - "; model: &mut model, term_width, line_offsets, - matches: vec![], // no matches - lookup_trie: SequenceTrie::new(), focus_index: 0, focus_wrap_around: false, default_output_destination: OutputDestination::Tmux, @@ -957,9 +954,10 @@ Barcelona https://en.wikipedia.org/wiki/Barcelona - "; #[test] /// Simulates rendering with matches. fn test_render_full_with_matches() { - let content = "lorem 127.0.0.1 lorem + let buffer = "lorem 127.0.0.1 lorem Barcelona https://en.wikipedia.org/wiki/Barcelona - "; + let lines = buffer.split('\n').collect::>(); let use_all_patterns = true; let named_pat = vec![]; @@ -968,7 +966,7 @@ Barcelona https://en.wikipedia.org/wiki/Barcelona - "; let reverse = true; let unique_hint = false; let mut model = textbuf::Model::new( - content, + &lines, &alphabet, use_all_patterns, &named_pat, @@ -1092,7 +1090,7 @@ Barcelona https://en.wikipedia.org/wiki/Barcelona - "; // .find(|(_idx, (&l, &r))| l != r); // println!("{:?}", diff_point); - assert_eq!(2, ui.matches.len()); + assert_eq!(2, ui.model.matches.len()); assert_eq!(writer, expected.as_bytes()); }