/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use rusqlite::types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef}; use rusqlite::{named_params, Result as RusqliteResult, ToSql}; use sql_support::ConnExt; use url::form_urlencoded; use crate::{ db::SuggestDao, provider::SuggestionProvider, rs::{DownloadedYelpSuggestion, SuggestRecordId}, suggestion::Suggestion, suggestion::YelpSubjectType, Result, SuggestionQuery, }; #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] #[repr(u8)] enum Modifier { Pre = 0, Post = 1, Yelp = 2, LocationSign = 3, } impl ToSql for Modifier { fn to_sql(&self) -> RusqliteResult> { Ok(ToSqlOutput::from(*self as u8)) } } impl ToSql for YelpSubjectType { fn to_sql(&self) -> RusqliteResult> { Ok(ToSqlOutput::from(*self as u8)) } } impl FromSql for YelpSubjectType { fn column_result(value: ValueRef<'_>) -> FromSqlResult { if value.as_i64().unwrap_or_default() == 0 { Ok(YelpSubjectType::Service) } else { Ok(YelpSubjectType::Business) } } } #[derive(Clone, Copy, Eq, PartialEq)] enum FindFrom { First, Last, } /// This module assumes like following query. /// "Yelp-modifier? Pre-modifier? Subject Post-modifier? (Location-modifier | Location-sign Location?)? Yelp-modifier?" /// For example, the query below is valid. /// "Yelp (Yelp-modifier) Best(Pre-modifier) Ramen(Subject) Delivery(Post-modifier) In(Location-sign) Tokyo(Location)" /// Also, as everything except Subject is optional, "Ramen" will be also valid query. /// However, "Best Best Ramen" and "Ramen Best" is out of the above appearance order rule, /// parsing will be failed. Also, every words except Location needs to be registered in DB. /// Please refer to the query test in store.rs for all of combination. /// Currently, the maximum query length is determined while referring to having word lengths in DB /// and location names. /// max subject: 50 + pre-modifier: 10 + post-modifier: 10 + location-sign: 7 + location: 50 = 127 = 150. const MAX_QUERY_LENGTH: usize = 150; /// The max number of words consisting the modifier. To improve the SQL performance by matching with /// "keyword=:modifier" (please see is_modifier()), define this how many words we should check. const MAX_MODIFIER_WORDS_NUMBER: usize = 2; /// At least this many characters must be typed for a subject to be matched. const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2; #[derive(Debug, PartialEq)] struct FindSubjectData<'a> { // The keyword in DB (but the case is inherited by query). subject: String, // Whether or not the keyword is exact match. exact_match: bool, // The subject type. subject_type: YelpSubjectType, // Words after removed matching subject. rest: &'a [&'a str], } impl SuggestDao<'_> { /// Inserts the suggestions for Yelp attachment into the database. pub(crate) fn insert_yelp_suggestions( &mut self, record_id: &SuggestRecordId, suggestion: &DownloadedYelpSuggestion, ) -> Result<()> { for keyword in &suggestion.subjects { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)", named_params! { ":record_id": record_id.as_str(), ":keyword": keyword, ":subject_type": YelpSubjectType::Service, }, )?; } for keyword in suggestion.business_subjects.as_ref().unwrap_or(&vec![]) { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)", named_params! { ":record_id": record_id.as_str(), ":keyword": keyword, ":subject_type": YelpSubjectType::Business, }, )?; } for keyword in &suggestion.pre_modifiers { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", named_params! { ":record_id": record_id.as_str(), ":type": Modifier::Pre, ":keyword": keyword, }, )?; } for keyword in &suggestion.post_modifiers { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", named_params! { ":record_id": record_id.as_str(), ":type": Modifier::Post, ":keyword": keyword, }, )?; } for keyword in &suggestion.yelp_modifiers { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", named_params! { ":record_id": record_id.as_str(), ":type": Modifier::Yelp, ":keyword": keyword, }, )?; } for keyword in &suggestion.location_signs { self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", named_params! { ":record_id": record_id.as_str(), ":type": Modifier::LocationSign, ":keyword": keyword, }, )?; } self.scope.err_if_interrupted()?; self.conn.execute_cached( "INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)", named_params! { ":record_id": record_id.as_str(), ":icon_id": suggestion.icon_id, ":score": suggestion.score, }, )?; Ok(()) } /// Fetch Yelp suggestion from given user's query. pub(crate) fn fetch_yelp_suggestions( &self, query: &SuggestionQuery, ) -> Result> { if !query.providers.contains(&SuggestionProvider::Yelp) { return Ok(vec![]); } if query.keyword.len() > MAX_QUERY_LENGTH { return Ok(vec![]); } let query_vec: Vec<_> = query.keyword.split_whitespace().collect(); let mut query_words: &[&str] = &query_vec; let pre_yelp_modifier_tuple = self.find_modifier(query_words, Modifier::Yelp, FindFrom::First)?; if let Some((_, rest)) = pre_yelp_modifier_tuple { query_words = rest; } let pre_modifier_tuple = self.find_modifier(query_words, Modifier::Pre, FindFrom::First)?; if let Some((_, rest)) = pre_modifier_tuple { query_words = rest; } let Some(subject_data) = self.find_subject(query_words)? else { return Ok(vec![]); }; query_words = subject_data.rest; let post_modifier_tuple = self.find_modifier(query_words, Modifier::Post, FindFrom::First)?; if let Some((_, rest)) = post_modifier_tuple { query_words = rest; } let location_sign_tuple = self.find_modifier(query_words, Modifier::LocationSign, FindFrom::First)?; if let Some((_, rest)) = location_sign_tuple { query_words = rest; } let post_yelp_modifier_tuple = self.find_modifier(query_words, Modifier::Yelp, FindFrom::Last)?; if let Some((_, rest)) = post_yelp_modifier_tuple { query_words = rest; } let location = if query_words.is_empty() { None } else { Some(query_words.join(" ")) }; let (icon, icon_mimetype, score) = self.fetch_custom_details()?; let builder = SuggestionBuilder { subject: &subject_data.subject, subject_exact_match: subject_data.exact_match, subject_type: subject_data.subject_type, pre_modifier: pre_modifier_tuple.map(|(words, _)| words.to_string()), post_modifier: post_modifier_tuple.map(|(words, _)| words.to_string()), location_sign: location_sign_tuple.map(|(words, _)| words.to_string()), location, icon, icon_mimetype, score, }; Ok(vec![builder.into()]) } /// Find the modifier for given query and modifier type. /// Find from last word, if set FindFrom::Last to find_from. /// It returns Option as follows: /// ( /// String: The keyword in DB (but the case is inherited by query). /// &[&str]: Words after removed matching modifier. /// ) fn find_modifier<'a>( &self, query_words: &'a [&'a str], modifier_type: Modifier, find_from: FindFrom, ) -> Result> { if query_words.is_empty() { return Ok(None); } for n in (1..=std::cmp::min(MAX_MODIFIER_WORDS_NUMBER, query_words.len())).rev() { let Some((candidate_chunk, rest)) = (match find_from { FindFrom::First => query_words.split_at_checked(n), FindFrom::Last => query_words .split_at_checked(query_words.len() - n) .map(|(front, back)| (back, front)), }) else { continue; }; let mut candidate = candidate_chunk.join(" "); if let Some(keyword_lowercase) = self.conn.try_query_one::( if n == query_words.len() { " SELECT keyword FROM yelp_modifiers WHERE type = :type AND keyword BETWEEN :word AND :word || x'FFFF' LIMIT 1 " } else { " SELECT keyword FROM yelp_modifiers WHERE type = :type AND keyword = :word LIMIT 1 " }, named_params! { ":type": modifier_type, ":word": candidate.to_lowercase(), }, true, )? { // Preserve the query as the user typed it including its case. candidate.push_str(keyword_lowercase.get(candidate.len()..).unwrap_or_default()); return Ok(Some((candidate, rest))); } } Ok(None) } /// Find the subject for given query. /// It returns Option. fn find_subject<'a>(&self, query_words: &'a [&'a str]) -> Result>> { if query_words.is_empty() { return Ok(None); } let mut query_string = query_words.join(" "); // This checks if keyword is a substring of the query. if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable( "SELECT keyword, subject_type FROM yelp_subjects WHERE :query BETWEEN keyword AND keyword || ' ' || x'FFFF' ORDER BY LENGTH(keyword) ASC, keyword ASC LIMIT 1", named_params! { ":query": query_string.to_lowercase(), }, |row| -> Result<_> { Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?)) }, true, ) { // Preserve the query as the user typed it including its case. return Ok(query_string.get(0..keyword_lowercase.len()).map(|keyword| { let count = keyword.split_whitespace().count(); FindSubjectData { subject: keyword.to_string(), exact_match: true, subject_type, rest: query_words.get(count..).unwrap_or_default(), } })); }; if query_string.len() < SUBJECT_PREFIX_MATCH_THRESHOLD { return Ok(None); } // Oppositely, this checks if the query is a substring of keyword. if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable( "SELECT keyword, subject_type FROM yelp_subjects WHERE keyword BETWEEN :query AND :query || x'FFFF' ORDER BY LENGTH(keyword) ASC, keyword ASC LIMIT 1", named_params! { ":query": query_string.to_lowercase(), }, |row| -> Result<_> { Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?)) }, true, ) { // Preserve the query as the user typed it including its case. return Ok(keyword_lowercase .get(query_string.len()..) .map(|keyword_rest| { query_string.push_str(keyword_rest); let count = std::cmp::min(query_words.len(), query_string.split_whitespace().count()); FindSubjectData { subject: query_string, exact_match: false, subject_type, rest: query_words.get(count..).unwrap_or_default(), } })); }; Ok(None) } /// Fetch the custom details for Yelp suggestions. /// It returns the location tuple as follows: /// ( /// Option>: Icon data. If not found, returns None. /// Option: Mimetype of the icon data. If not found, returns None. /// f64: Reflects score field in the yelp_custom_details table. /// ) /// /// Note that there should be only one record in `yelp_custom_details` /// as all the Yelp assets are stored in the attachment of a single record /// on Remote Settings. The following query will perform a table scan against /// `yelp_custom_details` followed by an index search against `icons`, /// which should be fine since there is only one record in the first table. fn fetch_custom_details(&self) -> Result<(Option>, Option, f64)> { let result = self.conn.query_row_and_then_cachable( r#" SELECT i.data, i.mimetype, y.score FROM yelp_custom_details y LEFT JOIN icons i ON y.icon_id = i.id LIMIT 1 "#, (), |row| -> Result<_> { Ok(( row.get::<_, Option>>(0)?, row.get::<_, Option>(1)?, row.get::<_, f64>(2)?, )) }, true, )?; Ok(result) } } struct SuggestionBuilder<'a> { subject: &'a str, subject_exact_match: bool, subject_type: YelpSubjectType, pre_modifier: Option, post_modifier: Option, location_sign: Option, location: Option, icon: Option>, icon_mimetype: Option, score: f64, } impl<'a> From> for Suggestion { fn from(builder: SuggestionBuilder<'a>) -> Suggestion { let description = [ builder.pre_modifier.as_deref(), Some(builder.subject), builder.post_modifier.as_deref(), ] .iter() .flatten() .copied() .collect::>() .join(" "); // https://www.yelp.com/search?find_desc={description}&find_loc={location} let mut url = String::from("https://www.yelp.com/search?"); let mut parameters = form_urlencoded::Serializer::new(String::new()); parameters.append_pair("find_desc", &description); if let Some(location) = &builder.location { parameters.append_pair("find_loc", location); } url.push_str(¶meters.finish()); let title = [ builder.pre_modifier.as_deref(), Some(builder.subject), builder.post_modifier.as_deref(), builder.location_sign.as_deref(), builder.location.as_deref(), ] .iter() .flatten() .copied() .collect::>() .join(" "); Suggestion::Yelp { url, title, icon: builder.icon, icon_mimetype: builder.icon_mimetype, score: builder.score, has_location_sign: builder.location_sign.is_some(), subject_exact_match: builder.subject_exact_match, subject_type: builder.subject_type, location_param: "find_loc".to_string(), } } } #[cfg(test)] mod tests { use super::*; use crate::{store::tests::TestStore, testing::*, SuggestIngestionConstraints}; #[test] fn yelp_functions() -> anyhow::Result<()> { before_each(); let store = TestStore::new( MockRemoteSettingsClient::default() .with_record(SuggestionProvider::Yelp.record("data-4", json!([ramen_yelp()]))) .with_record(SuggestionProvider::Yelp.icon(yelp_favicon())), ); store.ingest(SuggestIngestionConstraints { providers: Some(vec![SuggestionProvider::Yelp]), ..SuggestIngestionConstraints::all_providers() }); store.read(|dao| { type FindModifierTestCase<'a> = (&'a str, Modifier, FindFrom, Option<(String, &'a [&'a str])>); let find_modifer_tests: &[FindModifierTestCase] = &[ // Query, Modifier, FindFrom, Expected result. ("", Modifier::Pre, FindFrom::First, None), ("", Modifier::Post, FindFrom::First, None), ("", Modifier::Yelp, FindFrom::First, None), // Single word modifier. ( "b", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &[])), ), ( "be", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &[])), ), ( "bes", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &[])), ), ( "best", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &[])), ), ( "best ", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &[])), ), ( "best r", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &["r"])), ), ( "best ramen", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &["ramen"])), ), ( "best spicy ramen", Modifier::Pre, FindFrom::First, Some(("best".to_string(), &["spicy", "ramen"])), ), ( "delivery", Modifier::Post, FindFrom::First, Some(("delivery".to_string(), &[])), ), ( "yelp", Modifier::Yelp, FindFrom::First, Some(("yelp".to_string(), &[])), ), ( "same_modifier", Modifier::Pre, FindFrom::First, Some(("same_modifier".to_string(), &[])), ), ( "same_modifier", Modifier::Post, FindFrom::First, Some(("same_modifier".to_string(), &[])), ), ("same_modifier", Modifier::Yelp, FindFrom::First, None), // Multiple word modifier. ( "s", Modifier::Pre, FindFrom::First, Some(("same_modifier".to_string(), &[])), ), ( "su", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super b", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super be", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super bes", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super best", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &[])), ), ( "super best ramen", Modifier::Pre, FindFrom::First, Some(("super best".to_string(), &["ramen"])), ), ( "super delivery", Modifier::Post, FindFrom::First, Some(("super delivery".to_string(), &[])), ), ( "yelp keyword", Modifier::Yelp, FindFrom::First, Some(("yelp keyword".to_string(), &[])), ), // Different modifier or findfrom. ("best ramen", Modifier::Post, FindFrom::First, None), ("best ramen", Modifier::Yelp, FindFrom::First, None), ("best ramen", Modifier::Pre, FindFrom::Last, None), ( "ramen best", Modifier::Pre, FindFrom::Last, Some(("best".to_string(), &["ramen"])), ), // Keywords similar to modifire. ("bestabc", Modifier::Post, FindFrom::First, None), ("bestabc ramen", Modifier::Post, FindFrom::First, None), // Keep chars case. ( "BeSt SpIcY rAmEn", Modifier::Pre, FindFrom::First, Some(("BeSt".to_string(), &["SpIcY", "rAmEn"])), ), ( "SpIcY rAmEn DeLiVeRy", Modifier::Post, FindFrom::Last, Some(("DeLiVeRy".to_string(), &["SpIcY", "rAmEn"])), ), // Prefix match is available only for last words. ("be ramen", Modifier::Pre, FindFrom::First, None), ("bes ramen", Modifier::Pre, FindFrom::First, None), ]; for (query, modifier, findfrom, expected) in find_modifer_tests { assert_eq!( dao.find_modifier( &query.split_whitespace().collect::>(), *modifier, *findfrom )?, *expected ); } type FindSubjectTestCase<'a> = (&'a str, Option>); let find_subject_tests: &[FindSubjectTestCase] = &[ // Query, Expected result. ("", None), ("r", None), ( "ra", Some(FindSubjectData { subject: "rats".to_string(), exact_match: false, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "ram", Some(FindSubjectData { subject: "ramen".to_string(), exact_match: false, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "rame", Some(FindSubjectData { subject: "ramen".to_string(), exact_match: false, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "ramen", Some(FindSubjectData { subject: "ramen".to_string(), exact_match: true, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "spi", Some(FindSubjectData { subject: "spicy ramen".to_string(), exact_match: false, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "spicy ra ", Some(FindSubjectData { subject: "spicy ramen".to_string(), exact_match: false, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "spicy ramen", Some(FindSubjectData { subject: "spicy ramen".to_string(), exact_match: true, subject_type: YelpSubjectType::Service, rest: &[], }), ), ( "spicy ramen gogo", Some(FindSubjectData { subject: "spicy ramen".to_string(), exact_match: true, subject_type: YelpSubjectType::Service, rest: &["gogo"], }), ), ( "SpIcY rAmEn GoGo", Some(FindSubjectData { subject: "SpIcY rAmEn".to_string(), exact_match: true, subject_type: YelpSubjectType::Service, rest: &["GoGo"], }), ), ("ramenabc", None), ("ramenabc xyz", None), ("spicy ramenabc", None), ("spicy ramenabc xyz", None), ( "ramen abc", Some(FindSubjectData { subject: "ramen".to_string(), exact_match: true, subject_type: YelpSubjectType::Service, rest: &["abc"], }), ), ( "the shop", Some(FindSubjectData { subject: "the shop".to_string(), exact_match: true, subject_type: YelpSubjectType::Business, rest: &[], }), ), ]; for (query, expected) in find_subject_tests { assert_eq!( dao.find_subject(&query.split_whitespace().collect::>())?, *expected ); } type FindLocationSignTestCase<'a> = (&'a str, Option<(String, &'a [&'a str])>); let find_location_sign_tests: &[FindLocationSignTestCase] = &[ // Query, Expected result. ("", None), ("n", Some(("near".to_string(), &[]))), ("ne", Some(("near".to_string(), &[]))), ("nea", Some(("near".to_string(), &[]))), ("near", Some(("near".to_string(), &[]))), ("near ", Some(("near".to_string(), &[]))), ("near b", Some(("near by".to_string(), &[]))), ("near by", Some(("near by".to_string(), &[]))), ("near by a", Some(("near by".to_string(), &["a"]))), // Prefix match is available only for last words. ("nea r", None), ]; for (query, expected) in find_location_sign_tests { assert_eq!( dao.find_modifier( &query.split_whitespace().collect::>(), Modifier::LocationSign, FindFrom::First )?, *expected ); } Ok(()) })?; Ok(()) } }