tmux-copyrat/src/textbuf/model.rs

237 lines
8 KiB
Rust
Raw Normal View History

2020-06-01 09:16:56 +02:00
use std::collections;
2020-06-02 20:03:16 +02:00
use regex::Regex;
2020-05-31 22:45:36 +02:00
use sequence_trie::SequenceTrie;
2020-06-02 20:03:16 +02:00
use super::alphabet::Alphabet;
2021-03-20 07:06:33 +01:00
use super::matches::Match;
use super::raw_match::RawMatch;
use super::regexes::{NamedPattern, EXCLUDE_PATTERNS, PATTERNS};
2020-06-02 20:03:16 +02:00
2020-06-01 15:25:54 +02:00
/// Holds data for the `Ui`.
2020-06-01 09:33:40 +02:00
pub struct Model<'a> {
// buffer: &'a str,
pub lines: Vec<&'a str>,
2020-05-27 10:04:42 +02:00
alphabet: &'a Alphabet,
2020-06-04 09:45:58 +02:00
use_all_patterns: bool,
2021-03-13 18:51:31 +01:00
named_patterns: &'a [NamedPattern],
custom_patterns: &'a [String],
2020-05-30 22:23:33 +02:00
pub reverse: bool,
2020-06-02 20:03:16 +02:00
}
2020-06-01 09:33:40 +02:00
impl<'a> Model<'a> {
2020-05-27 10:04:42 +02:00
pub fn new(
buffer: &'a str,
2020-05-27 10:04:42 +02:00
alphabet: &'a Alphabet,
2020-06-04 09:45:58 +02:00
use_all_patterns: bool,
2021-03-13 18:51:31 +01:00
named_patterns: &'a [NamedPattern],
custom_patterns: &'a [String],
2020-05-30 22:23:33 +02:00
reverse: bool,
2020-06-01 09:33:40 +02:00
) -> Model<'a> {
let lines = buffer.split('\n').collect();
2020-06-01 09:33:40 +02:00
Model {
// buffer,
2020-05-27 10:04:42 +02:00
lines,
alphabet,
2020-06-04 09:45:58 +02:00
use_all_patterns,
named_patterns,
2020-06-04 09:45:58 +02:00
custom_patterns,
2020-05-30 22:23:33 +02:00
reverse,
2020-05-27 10:04:42 +02:00
}
2020-06-02 20:03:16 +02:00
}
2020-05-27 10:04:42 +02:00
2020-05-31 22:45:36 +02:00
/// Returns a vector of `Match`es, each corresponding to a pattern match
/// in the lines, its location (x, y), and associated hint.
2020-05-30 22:23:33 +02:00
pub fn matches(&self, unique: bool) -> Vec<Match<'a>> {
2020-05-31 00:17:51 +02:00
let mut raw_matches = self.raw_matches();
if self.reverse {
raw_matches.reverse();
}
let mut matches = self.associate_hints(&raw_matches, unique);
if self.reverse {
matches.reverse();
}
matches
}
2020-06-01 09:33:40 +02:00
/// Internal function that searches the model's lines for pattern matches.
2020-05-31 22:45:36 +02:00
/// Returns a vector of `RawMatch`es (text, location, pattern id) without
/// an associated hint. The hint is attached to `Match`, not to `RawMatch`.
2020-06-01 09:16:56 +02:00
///
/// # Notes
///
/// Custom regexes have priority over other regexes.
///
/// If no named patterns were specified, it will search for all available
/// patterns from the `PATTERNS` catalog.
2020-05-31 00:17:51 +02:00
fn raw_matches(&self) -> Vec<RawMatch<'a>> {
let exclude_regexes = EXCLUDE_PATTERNS
2020-05-27 10:04:42 +02:00
.iter()
2020-05-31 00:17:51 +02:00
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
2020-05-27 10:04:42 +02:00
.collect::<Vec<_>>();
2020-05-31 00:17:51 +02:00
let custom_regexes = self
2020-06-04 09:45:58 +02:00
.custom_patterns
2020-05-27 10:04:42 +02:00
.iter()
2020-05-31 00:17:51 +02:00
.map(|pattern| {
(
"custom",
Regex::new(pattern).expect("Invalid custom regexp"),
)
})
2020-05-27 10:04:42 +02:00
.collect::<Vec<_>>();
2020-06-04 09:45:58 +02:00
let regexes = if self.use_all_patterns {
PATTERNS
.iter()
.map(|&(name, pattern)| (name, Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
} else {
self.named_patterns
.iter()
.map(|NamedPattern(name, pattern)| (name.as_str(), Regex::new(pattern).unwrap()))
.collect::<Vec<(&str, regex::Regex)>>()
};
2020-05-27 10:04:42 +02:00
2020-05-31 00:17:51 +02:00
let all_regexes = [exclude_regexes, custom_regexes, regexes].concat();
2020-05-27 10:04:42 +02:00
2021-03-20 19:47:00 +01:00
let mut raw_matches = Vec::new();
2020-05-27 10:04:42 +02:00
for (index, line) in self.lines.iter().enumerate() {
2021-03-20 19:47:00 +01:00
// Chunk is the remainder of the line to be searched for matches.
2020-05-31 00:17:51 +02:00
// This advances iteratively, until no matches can be found.
2020-05-27 10:04:42 +02:00
let mut chunk: &str = line;
let mut offset: i32 = 0;
2020-05-31 00:17:51 +02:00
// Use all avail regexes to match the chunk and select the match
// occuring the earliest on the chunk. Save its matched text and
2021-03-20 19:47:00 +01:00
// position in a `RawMatch` struct.
2020-05-27 10:04:42 +02:00
loop {
2021-03-20 19:47:00 +01:00
// For each avalable regex, use the `find_iter` iterator to
// get the first non-overlapping match in the chunk, returning
// the start and end byte indices with respect to the chunk.
2020-05-31 00:17:51 +02:00
let chunk_matches = all_regexes
2020-05-27 10:04:42 +02:00
.iter()
2021-03-20 19:47:00 +01:00
.filter_map(|(&ref pat_name, reg)| match reg.find_iter(chunk).next() {
Some(reg_match) => Some((pat_name, reg, reg_match)),
2020-05-27 10:04:42 +02:00
None => None,
})
.collect::<Vec<_>>();
2020-05-31 00:17:51 +02:00
if chunk_matches.is_empty() {
break;
}
2020-06-01 09:16:56 +02:00
// First match on the chunk.
2021-03-20 19:47:00 +01:00
let (pat_name, reg, reg_match) = chunk_matches
2020-05-27 10:04:42 +02:00
.iter()
2021-03-20 19:47:00 +01:00
.min_by_key(|element| element.2.start())
2020-05-31 00:17:51 +02:00
.unwrap();
// Never hint or break ansi color sequences.
2021-03-20 19:47:00 +01:00
if *pat_name != "ansi_colors" {
let text = reg_match.as_str();
// In case the pattern has a capturing group, try obtaining
// that text and start offset, else use the entire match.
let (subtext, substart) = match reg
.captures_iter(text)
.next()
.expect("This regex is guaranteed to match.")
.get(1)
{
Some(capture) => (capture.as_str(), capture.start()),
None => (text, 0),
};
raw_matches.push(RawMatch {
x: offset + reg_match.start() as i32 + substart as i32,
2020-05-31 00:17:51 +02:00
y: index as i32,
2021-03-20 19:47:00 +01:00
pattern: pat_name,
2020-05-31 00:17:51 +02:00
text: subtext,
});
2020-05-27 10:04:42 +02:00
}
2020-05-31 00:17:51 +02:00
2021-03-20 19:47:00 +01:00
chunk = chunk
.get(reg_match.end()..)
.expect("The chunk must be larger than the regex match.");
offset += reg_match.end() as i32;
2020-06-02 20:03:16 +02:00
}
2020-05-27 10:04:42 +02:00
}
2021-03-20 19:47:00 +01:00
raw_matches
2020-05-31 00:17:51 +02:00
}
2020-05-27 10:04:42 +02:00
2020-05-31 00:17:51 +02:00
/// Associate a hint to each `RawMatch`, returning a vector of `Match`es.
///
/// If `unique` is `true`, all duplicate matches will have the same hint.
/// For copying matched text, this seems easier and more natural.
/// If `unique` is `false`, duplicate matches will have their own hint.
2021-03-13 18:51:31 +01:00
fn associate_hints(&self, raw_matches: &[RawMatch<'a>], unique: bool) -> Vec<Match<'a>> {
2020-05-31 00:17:51 +02:00
let hints = self.alphabet.make_hints(raw_matches.len());
let mut hints_iter = hints.iter();
let mut result: Vec<Match<'a>> = vec![];
2020-05-27 10:04:42 +02:00
if unique {
2020-05-31 00:17:51 +02:00
// Map (text, hint)
2020-06-01 09:16:56 +02:00
let mut known: collections::HashMap<&str, &str> = collections::HashMap::new();
2020-05-31 00:17:51 +02:00
for raw_mat in raw_matches {
2021-03-13 18:51:31 +01:00
let hint: &str = known.entry(raw_mat.text).or_insert_with(|| {
2020-05-31 00:17:51 +02:00
hints_iter
.next()
2021-03-13 18:51:31 +01:00
.expect("We should have as many hints as necessary, even invisible ones.")
});
2020-05-31 00:17:51 +02:00
result.push(Match {
x: raw_mat.x,
y: raw_mat.y,
pattern: raw_mat.pattern,
text: raw_mat.text,
hint: hint.to_string(),
});
2020-05-27 10:04:42 +02:00
}
2020-06-02 20:03:16 +02:00
} else {
2020-05-31 00:17:51 +02:00
for raw_mat in raw_matches {
let hint = hints_iter
.next()
.expect("We should have as many hints as necessary, even invisible ones.");
result.push(Match {
x: raw_mat.x,
y: raw_mat.y,
pattern: raw_mat.pattern,
text: raw_mat.text,
hint: hint.to_string(),
});
2020-05-27 10:04:42 +02:00
}
2020-06-02 20:03:16 +02:00
}
2020-05-27 10:04:42 +02:00
2020-05-31 00:17:51 +02:00
result
2020-06-02 20:03:16 +02:00
}
2020-05-31 22:45:36 +02:00
/// Builds a `SequenceTrie` that helps determine if a sequence of keys
/// entered by the user corresponds to a match. This kind of lookup
/// directly returns a reference to the corresponding `Match` if any.
2021-03-13 18:51:31 +01:00
pub fn build_lookup_trie(matches: &'a [Match<'a>]) -> SequenceTrie<char, usize> {
2020-05-31 22:45:36 +02:00
let mut trie = SequenceTrie::new();
for (index, mat) in matches.iter().enumerate() {
let hint_chars = mat.hint.chars().collect::<Vec<char>>();
// no need to insert twice the same hint
if trie.get(&hint_chars).is_none() {
trie.insert_owned(hint_chars, index);
}
}
trie
}
2020-05-27 10:04:42 +02:00
}