/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use icu_casemap::CaseMapperBorrowed; use icu_normalizer::DecomposingNormalizerBorrowed; use icu_properties::props::GeneralCategory; use icu_properties::props::GeneralCategoryGroup; use icu_properties::CodePointMapDataBorrowed; use std::borrow::Cow; use crate::Result; static NFKD: DecomposingNormalizerBorrowed = DecomposingNormalizerBorrowed::new_nfkd(); static GENERAL_CATEGORY: CodePointMapDataBorrowed<'static, GeneralCategory> = icu_properties::CodePointMapData::::new(); static CASE_MAPPER: CaseMapperBorrowed = CaseMapperBorrowed::new(); fn is_combining_mark(c: char) -> bool { GeneralCategoryGroup::Mark.contains(GENERAL_CATEGORY.get(c)) } /// Given a list of keywords for a suggestion, returns a phrase that best /// completes the user's query. This function uses two heuristics to pick the /// best match: /// /// 1. Find the first keyword in the list that has at least one more word than /// the query, then trim the keyword up to the end of that word. /// 2. If there isn't a keyword with more words, pick the keyword that forms the /// longest suffix of the query. This might be the query itself. pub fn full_keyword(query: &str, keywords: &[impl AsRef]) -> String { let query_words_len = query.split_whitespace().count(); let min_phrase_words_len = if query.ends_with(char::is_whitespace) { // If the query ends with a space, find a keyword with at least one more // word, so that the completed phrase can show a word after the space. query_words_len + 1 } else { query_words_len }; keywords .iter() .map(AsRef::as_ref) .filter(|phrase| phrase.starts_with(query)) .map(|phrase| phrase.split_whitespace().collect::>()) .find(|phrase_words| phrase_words.len() > min_phrase_words_len) .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" ")) .unwrap_or_else(|| { keywords .iter() .map(AsRef::as_ref) .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len()) .max_by_key(|phrase| phrase.trim().len()) .unwrap_or(query) .to_owned() }) } /// Performs a depth-first traversal over all possible chunk sequences in a /// slice, applies a filter-map function to each chunk in each sequence, and /// collects the filter-mapped sequences in a `Vec`. /// /// "Chunks" are non-overlapping subslices of the parent slice as described in /// [`slice::chunks()`]. /// /// WARNING: This function potentially does an exponential amount of work! You /// should always be careful to prune the traversal space by returning `None` /// from your mappper function, as described further below, when a chunk does /// not match what you are searching for. /// /// `max_chunk_size` controls the maximum chunk size (in number of words), which /// influences the branching factor at each step in the traversal. /// /// At each traversal step, the filter-map function is called like: /// `f(chunk, chunk_index, is_last_chunk, path)`. /// /// `chunk` is the chunk at that step, `chunk_index` is its index in the parent /// `words` slice, and `is_last_chunk` is true if the chunk is at the end of /// `words`. The function can map the chunk to one or more values. Each value /// expands the branching factor at the current step by `max_chunk_size`. In /// other words, the branching factor at a given traversal step is /// `max_chunk_size` multiplied by the number of values returned by the /// filter-map function at that step. `path` is the path of mapped values that /// has been travsersed at that step: a sequence of mapped values corresponding /// to chunks in the parent `words` slice. /// /// The filter-map function can return `None` to halt traversal at the current /// step. Returning `None` sets the branching factor at that step to zero, /// pruning the subtree rooted at that step from the traversal space and /// discarding the path from the output. This is important for keeping traversal /// reasonably bounded. /// /// Traversal ends and the function returns when all paths have been visited. /// The returned `Vec` will contain all traversal paths that weren't pruned. /// /// # Examples /// /// Mapping chunks in `["a", "b", "c"]` to uppercase, up to a max chunk size of /// `3`: /// /// ``` /// # use suggest::util::filter_map_chunks; /// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| { /// Ok(Some(vec![chunk.to_uppercase()])) /// }); /// assert_eq!(paths.unwrap(), vec![ /// vec!["A", "B", "C"], /// vec!["A", "B C"], /// vec!["A B", "C"], /// vec!["A B C"] /// ]); /// ``` /// /// Same as previous but using `chunk_index` in the filter-map function to prune /// paths that don't start with `"a"`: /// /// ``` /// # use suggest::util::filter_map_chunks; /// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, chunk_index, _, _| { /// if chunk_index > 0 || chunk == "a" { /// Ok(Some(vec![chunk.to_uppercase()])) /// } else { /// Ok(None) /// } /// }); /// assert_eq!(paths.unwrap(), vec![ /// vec!["A", "B", "C"], /// vec!["A", "B C"], /// ]); /// ``` /// /// Same as the first example but using `path` in the filter-map function to /// prune paths that include "A B": /// /// ``` /// # use suggest::util::filter_map_chunks; /// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, path| { /// if path.iter().any(|value| value == "A B") { /// Ok(None) /// } else { /// Ok(Some(vec![chunk.to_uppercase()])) /// } /// }); /// assert_eq!(paths.unwrap(), vec![ /// vec!["A", "B", "C"], /// vec!["A", "B C"], /// vec!["A B C"], /// ]); /// ``` /// /// Mapping each chunk to multiple values: /// /// ``` /// # use suggest::util::filter_map_chunks; /// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| { /// Ok(Some(vec![format!("{chunk}0"), format!("{chunk}1")])) /// }); /// assert_eq!(paths.unwrap(), vec![ /// vec!["a0", "b0", "c0"], /// vec!["a0", "b0", "c1"], /// vec!["a0", "b1", "c0"], /// vec!["a0", "b1", "c1"], /// vec!["a0", "b c0"], /// vec!["a0", "b c1"], /// vec!["a1", "b0", "c0"], /// vec!["a1", "b0", "c1"], /// vec!["a1", "b1", "c0"], /// vec!["a1", "b1", "c1"], /// vec!["a1", "b c0"], /// vec!["a1", "b c1"], /// vec!["a b0", "c0"], /// vec!["a b0", "c1"], /// vec!["a b1", "c0"], /// vec!["a b1", "c1"], /// vec!["a b c0"], /// vec!["a b c1"] /// ]); /// ``` pub fn filter_map_chunks( words: &[&str], max_chunk_size: usize, f: impl Fn(&str, usize, bool, &[T]) -> Result>>, ) -> Result>> { let normalized_query = words.join(" "); filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f) } /// `remaining_words` is the slice of remaining words in the query string at /// this step. `remaining_query` is the remaining slice of the normalized query /// string at this step. /// /// `path` is the sequence of values returned by the filter-map function so far /// at this step. /// /// `chunk_index` is the word-based index in the query string at this step. fn filter_map_chunks_recurse( remaining_words: &[&str], remaining_query: &str, path: &mut Vec, chunk_index: usize, max_chunk_size: usize, f: &impl Fn(&str, usize, bool, &[T]) -> Result>>, ) -> Result>> { // Filtered-in (non-pruned) paths that will be returned from this step of // the traversal. let mut this_step_paths: Vec> = vec![]; for chunk_size in 1..=max_chunk_size { if remaining_words.len() < chunk_size { // `chunk_size` and the later chunk sizes in this for-loop are too // big to visit the remaining words. We already visited them earlier // in the loop when the chunk size was small enough. break; } // Get the current chunk within the remaining query. Its byte length is // the sum of the lengths of the words in the chunk + `chunk_size - 1` // spaces between the words. There will only be one space between each // word in `remaining_query` because `remaining_query` is normalized. let chunk_byte_len = remaining_words[..chunk_size] .iter() .fold(chunk_size - 1, |memo, w| memo + w.len()); let chunk = &remaining_query[..chunk_byte_len]; let is_last_chunk = chunk_size == remaining_words.len(); // Call the mapper function. if let Some(mapped_values) = f(chunk, chunk_index, is_last_chunk, &path[..])? { for value in mapped_values { if is_last_chunk { // This is the last chunk in the path. Stop recursing. this_step_paths.push(vec![value.clone()]); } else { // Recurse. Note that the new `remaining_words` slice won't // be empty because if it were, `chunk_size` would equal the // remaining word count, which is if-branch condition. path.push(value.clone()); let subtree_paths = filter_map_chunks_recurse( &remaining_words[chunk_size..], &remaining_query[(chunk_byte_len + 1)..], path, chunk_index + chunk_size, max_chunk_size, f, )?; path.pop(); for mut p in subtree_paths { p.insert(0, value.clone()); this_step_paths.push(p); } } } } } Ok(this_step_paths) } /// Given a keyword for a suggestion, splits the keyword by the first whitespace /// into the prefix and the suffix. Returns an empty string as the suffix if there /// is no whitespace. pub fn split_keyword(keyword: &str) -> (&str, &str) { keyword.split_once(' ').unwrap_or((keyword, "")) } /// Compares two strings ignoring case, Unicode combining marks, and some /// punctuation. Intended to be used as a Sqlite collating sequence for /// comparing natural language strings like keywords and geoname names. /// /// XXX: Should really be using `icu_collator` for collation! pub fn i18n_cmp(a: &str, b: &str) -> std::cmp::Ordering { CASE_MAPPER .fold_string(&i18n_transform(a)) .cmp(&CASE_MAPPER.fold_string(&i18n_transform(b))) } /// Performs the following transforms on the given string: /// /// * Removes Unicode combining marks /// * Removes some punctuation /// * Replaces other punctuation with spaces pub fn i18n_transform(s: &str) -> Cow<'_, str> { // Punctuation to remove. Examples: // // "Washington, D.C." => "Washington DC" // "L'Assomption" => "LAssomption" macro_rules! pattern_remove { () => { '.' | ',' | '\'' | '’' }; } // Punctuation to replace with spaces. Examples: // // "Carmel-by-the-Sea" => "Carmel by the Sea" macro_rules! pattern_replace_with_space { () => { '-' }; } macro_rules! pattern_all { () => { pattern_remove!() | pattern_replace_with_space!() }; } let borrowable = !NFKD .normalize_iter(s.chars()) .any(|c| is_combining_mark(c) || matches!(c, pattern_all!())); if borrowable { // XXX: This borrows the non-NFKD form, which sure looks like a bug, // since NFKD can introduce changes that aren't caught by the check // inside `any()` above. Cow::from(s) } else { NFKD.normalize_iter(s.chars()) .filter_map(|c| { if is_combining_mark(c) { // Remove Unicode combining marks: // "Que\u{0301}bec" => "Quebec" None } else { match c { pattern_remove!() => None, pattern_replace_with_space!() => Some(' '), _ => Some(c), } } }) .collect::<_>() } } #[cfg(test)] mod tests { use super::*; use itertools::Itertools; #[test] fn keywords_with_more_words() { assert_eq!( full_keyword( "moz", &[ "moz", "mozi", "mozil", "mozill", "mozilla", "mozilla firefox" ] ), "mozilla".to_owned(), ); assert_eq!( full_keyword( "mozilla", &[ "moz", "mozi", "mozil", "mozill", "mozilla", "mozilla firefox" ] ), "mozilla".to_owned(), ); } #[test] fn keywords_with_longer_phrase() { assert_eq!( full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]), "mozilla".to_owned() ); assert_eq!( full_keyword( "mozilla f", &["moz", "mozi", "mozil", "mozill", "mozilla firefox"] ), "mozilla firefox".to_owned() ); } #[test] fn query_ends_with_space() { assert_eq!( full_keyword( "mozilla ", &["moz", "mozi", "mozil", "mozill", "mozilla firefox"] ), "mozilla firefox".to_owned() ); } fn fmc( query: &str, max_chunk_size: usize, f: impl Fn(&str, usize, bool, &[T]) -> Result>>, ) -> Result>> { let words: Vec<_> = query.split_whitespace().collect(); filter_map_chunks(&words, max_chunk_size, f) } fn check_paths(actual: Vec>, expected: Vec>) { assert_eq!( actual, expected .into_iter() .map(|p| p .into_iter() .map(|(w, i)| (w.to_string(), i)) .collect::>()) .collect::>>() ); } #[test] fn filter_map_chunks_1() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]], ); Ok(()) } #[test] fn filter_map_chunks_2() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_3() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_4() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_5() -> anyhow::Result<()> { let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], vec![("a b c d e", 0)], ], ); Ok(()) } #[test] fn filter_map_chunks_1_map_many() -> anyhow::Result<()> { let paths = fmc("a b c", 1, |chunk, _, _, _| { Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect())) })?; assert_eq!( paths, vec![ vec!["a0", "b0", "c0"], vec!["a0", "b0", "c1"], vec!["a0", "b0", "c2"], vec!["a0", "b1", "c0"], vec!["a0", "b1", "c1"], vec!["a0", "b1", "c2"], vec!["a0", "b2", "c0"], vec!["a0", "b2", "c1"], vec!["a0", "b2", "c2"], vec!["a1", "b0", "c0"], vec!["a1", "b0", "c1"], vec!["a1", "b0", "c2"], vec!["a1", "b1", "c0"], vec!["a1", "b1", "c1"], vec!["a1", "b1", "c2"], vec!["a1", "b2", "c0"], vec!["a1", "b2", "c1"], vec!["a1", "b2", "c2"], vec!["a2", "b0", "c0"], vec!["a2", "b0", "c1"], vec!["a2", "b0", "c2"], vec!["a2", "b1", "c0"], vec!["a2", "b1", "c1"], vec!["a2", "b1", "c2"], vec!["a2", "b2", "c0"], vec!["a2", "b2", "c1"], vec!["a2", "b2", "c2"] ] ); Ok(()) } #[test] fn filter_map_chunks_2_map_many() -> anyhow::Result<()> { let paths = fmc("a b c", 2, |chunk, _, _, _| { Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect())) })?; assert_eq!( paths, vec![ vec!["a0", "b0", "c0"], vec!["a0", "b0", "c1"], vec!["a0", "b0", "c2"], vec!["a0", "b1", "c0"], vec!["a0", "b1", "c1"], vec!["a0", "b1", "c2"], vec!["a0", "b2", "c0"], vec!["a0", "b2", "c1"], vec!["a0", "b2", "c2"], vec!["a0", "b c0"], vec!["a0", "b c1"], vec!["a0", "b c2"], vec!["a1", "b0", "c0"], vec!["a1", "b0", "c1"], vec!["a1", "b0", "c2"], vec!["a1", "b1", "c0"], vec!["a1", "b1", "c1"], vec!["a1", "b1", "c2"], vec!["a1", "b2", "c0"], vec!["a1", "b2", "c1"], vec!["a1", "b2", "c2"], vec!["a1", "b c0"], vec!["a1", "b c1"], vec!["a1", "b c2"], vec!["a2", "b0", "c0"], vec!["a2", "b0", "c1"], vec!["a2", "b0", "c2"], vec!["a2", "b1", "c0"], vec!["a2", "b1", "c1"], vec!["a2", "b1", "c2"], vec!["a2", "b2", "c0"], vec!["a2", "b2", "c1"], vec!["a2", "b2", "c2"], vec!["a2", "b c0"], vec!["a2", "b c1"], vec!["a2", "b c2"], vec!["a b0", "c0"], vec!["a b0", "c1"], vec!["a b0", "c2"], vec!["a b1", "c0"], vec!["a b1", "c1"], vec!["a b1", "c2"], vec!["a b2", "c0"], vec!["a b2", "c1"], vec!["a b2", "c2"] ] ); Ok(()) } #[test] fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk { "a" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths(paths, vec![]); Ok(()) } #[test] fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk { "b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths(paths, vec![]); Ok(()) } #[test] fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk { "c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths(paths, vec![]); Ok(()) } #[test] fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk { "d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths(paths, vec![]); Ok(()) } #[test] fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk { "e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths(paths, vec![]); Ok(()) } #[test] fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "a" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "a b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "a" | "b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "a" | "c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "b c" | "c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk { "b c" | "d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "a" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "a b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "a b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "b c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "c d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk { "a" | "b c" | "c d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a b" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a b c" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "b c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "c d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a b c d" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a", 0), ("b c d e", 1)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "b c d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b", 1), ("c d e", 2)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a", 0), ("b c d", 1), ("e", 4)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a" | "b c" | "d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b", 0), ("c d e", 2)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> { let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk { "a" | "b c" | "c d e" => Ok(None), _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])), })?; check_paths( paths, vec![ vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], vec![("a b c", 0), ("d", 3), ("e", 4)], vec![("a b c", 0), ("d e", 3)], vec![("a b c d", 0), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_spaces() -> anyhow::Result<()> { let paths = fmc(" a b c d e ", 2, |chunk, chunk_index, _, _| { Ok(Some(vec![(chunk.to_string(), chunk_index)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)], vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)], vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)], vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)], vec![("a", 0), ("b c", 1), ("d e", 3)], vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)], vec![("a b", 0), ("c", 2), ("d e", 3)], vec![("a b", 0), ("c d", 2), ("e", 4)], ], ); Ok(()) } #[test] fn filter_map_chunks_is_last_1() -> anyhow::Result<()> { let paths = fmc("a b c d e", 1, |chunk, _, is_last, _| { Ok(Some(vec![(chunk.to_string(), is_last as usize)])) })?; check_paths( paths, vec![vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)]], ); Ok(()) } #[test] fn filter_map_chunks_is_last_2() -> anyhow::Result<()> { let paths = fmc("a b c d e", 2, |chunk, _, is_last, _| { Ok(Some(vec![(chunk.to_string(), is_last as usize)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)], vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)], vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)], vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)], vec![("a", 0), ("b c", 0), ("d e", 1)], vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)], vec![("a b", 0), ("c", 0), ("d e", 1)], vec![("a b", 0), ("c d", 0), ("e", 1)], ], ); Ok(()) } #[test] fn filter_map_chunks_is_last_3() -> anyhow::Result<()> { let paths = fmc("a b c d e", 3, |chunk, _, is_last, _| { Ok(Some(vec![(chunk.to_string(), is_last as usize)])) })?; check_paths( paths, vec![ vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)], vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)], vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)], vec![("a", 0), ("b", 0), ("c d e", 1)], vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)], vec![("a", 0), ("b c", 0), ("d e", 1)], vec![("a", 0), ("b c d", 0), ("e", 1)], vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)], vec![("a b", 0), ("c", 0), ("d e", 1)], vec![("a b", 0), ("c d", 0), ("e", 1)], vec![("a b", 0), ("c d e", 1)], vec![("a b c", 0), ("d", 0), ("e", 1)], vec![("a b c", 0), ("d e", 1)], ], ); Ok(()) } #[test] fn test_split_keyword() { assert_eq!(split_keyword("foo"), ("foo", "")); assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz")); } #[test] fn i18n_transform() -> anyhow::Result<()> { // (test str, expected str) let tests = [ ("AbC", "AbC"), ("AbC dEf", "AbC dEf"), ("Àęí", "Aei"), // "Québec" with single 'é' char ("Qu\u{00e9}bec", "Quebec"), // "Québec" with ASCII 'e' followed by combining acute accent ("Que\u{0301}bec", "Quebec"), ("Gößnitz", "Goßnitz"), ("St. Louis", "St Louis"), ("Washington, D.C.", "Washington DC"), ("U.S.A.", "USA"), ("Carmel-by-the-Sea", "Carmel by the Sea"), ("Val-d'Or", "Val dOr"), ("Val-d’Or", "Val dOr"), (".,-'()[]?<>", " ()[]?<>"), ]; for (test_str, expected_str) in tests { assert_eq!( super::i18n_transform(test_str), expected_str, "Transform test str: {:?}", test_str ); } Ok(()) } #[test] fn i18n_cmp() -> anyhow::Result<()> { let tests = [ ["AbC xYz", "ABC XYZ", "abc xyz"].as_slice(), &["Àęí", "Aei", "àęí", "aei"], &[ // "Québec" with single 'é' char "Qu\u{00e9}bec", // "Québec" with ASCII 'e' followed by combining acute accent "Que\u{0301}bec", "Quebec", "quebec", ], &[ "Gößnitz", "Gössnitz", "Goßnitz", "Gossnitz", "gößnitz", "gössnitz", "goßnitz", "gossnitz", ], &["St. Louis", "St... Louis", "St Louis", "st louis"], &[ "Washington, D.C.", "Washington, DC", "Washington D.C.", "Washington DC", "washington dc", ], &[ "U.S.A.", "US.A.", "U.SA.", "U.S.A", "USA.", "U.SA", "USA", "usa", ], &[ "Val-d'Or", "Val-d’Or", "Val-dOr", "Val d'Or", "Val d’Or", "Val dOr", "val dor", ], &[ "Carmel-by-the-Sea", "Carmel by the Sea", "carmel by the sea", ], &[".,-'()[]?<>", " ()[]?<>"], ]; for strs in tests { for a_and_b in strs.iter().permutations(2) { assert_eq!( super::i18n_cmp(a_and_b[0], a_and_b[1]), std::cmp::Ordering::Equal, "Comparing: {:?}", a_and_b ); } } Ok(()) } }