/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /// Fakespot-specific logic /// /// Score used to order Fakespot suggestions /// /// FakespotScore contains several components, each in the range of [0, 1] pub struct FakespotScore { /// Did the query match the `keywords` field exactly? keywords_score: f64, /// How well did the query match the `product_type` field? product_type_score: f64, /// Fakespot score from the RS data, this reflects the average review, number of reviews, /// Fakespot grade, etc. fakespot_score: f64, } impl FakespotScore { pub fn new(query: &str, keywords: String, product_type: String, fakespot_score: f64) -> Self { let query = query.to_lowercase(); let query_terms = split_terms(&query); Self { keywords_score: calc_keywords_score(&query_terms, &keywords), product_type_score: calc_product_type_score(&query_terms, &product_type), fakespot_score, } } /// Convert a FakespotScore into the value to use in `Sugggestion::Fakespot::score` /// /// This converts FakespotScore into a single float that: /// - Is > 0.3 so that Fakespot suggestions are preferred to AMP ones /// - Reflects the Fakespot ordering: /// - Suggestions with higher keywords_score are greater /// - If keywords_score is tied, then suggestions with higher product_type_scores are greater /// - If both are tied, then suggestions with higher fakespot_score are greater pub fn as_suggest_score(&self) -> f64 { 0.30 + (0.01 * self.keywords_score) + (0.001 * self.product_type_score) + (0.0001 * self.fakespot_score) } } /// Split a string containing terms into a list of individual terms, normalized to lowercase fn split_terms(string: &str) -> Vec<&str> { string.split_whitespace().collect() } fn calc_keywords_score(query_terms: &[&str], keywords: &str) -> f64 { // Note: We can assume keywords is lower-case, since we do that during ingestion let keyword_terms = split_terms(keywords); if keyword_terms.is_empty() { return 0.0; } if query_terms == keyword_terms { 1.0 } else { 0.0 } } fn calc_product_type_score(query_terms: &[&str], product_type: &str) -> f64 { // Note: We can assume product_type is lower-case, since we do that during ingestion let product_type_terms = split_terms(product_type); if product_type_terms.is_empty() { return 0.0; } let count = product_type_terms .iter() .filter(|t| query_terms.contains(t)) .count() as f64; count / product_type_terms.len() as f64 } #[cfg(test)] mod tests { use super::*; struct KeywordsTestCase { keywords: &'static str, query: &'static str, expected: f64, } impl KeywordsTestCase { fn test(&self) { let actual = calc_keywords_score(&split_terms(&self.query.to_lowercase()), self.keywords); assert_eq!( actual, self.expected, "keywords: {} query: {} expected: {} actual: {actual}", self.keywords, self.query, self.expected, ); } } #[test] fn test_keywords_score() { // Keyword score 1.0 on exact matches, 0.0 otherwise KeywordsTestCase { keywords: "apple", query: "apple", expected: 1.0, } .test(); KeywordsTestCase { keywords: "apple", query: "android", expected: 0.0, } .test(); KeywordsTestCase { keywords: "apple", query: "apple phone", expected: 0.0, } .test(); // Empty keywords should always score 0.0 KeywordsTestCase { keywords: "", query: "", expected: 0.0, } .test(); KeywordsTestCase { keywords: "", query: "apple", expected: 0.0, } .test(); // Matching should be case insensitive KeywordsTestCase { keywords: "apple", query: "Apple", expected: 1.0, } .test(); } struct ProductTypeTestCase { query: &'static str, product_type: &'static str, expected: f64, } impl ProductTypeTestCase { fn test(&self) { let actual = calc_product_type_score( &split_terms(&self.query.to_lowercase()), self.product_type, ); assert_eq!( actual, self.expected, "product_type: {} query: {} expected: {} actual: {actual}", self.product_type, self.query, self.expected, ); } } #[test] fn test_product_type_score() { // Product type scores based on the percentage of terms in the product type that are also // present in the query ProductTypeTestCase { product_type: "standing desk", query: "standing desk", expected: 1.0, } .test(); ProductTypeTestCase { product_type: "standing desk", query: "desk", expected: 0.5, } .test(); ProductTypeTestCase { product_type: "standing desk", query: "desk desk desk", expected: 0.5, } .test(); ProductTypeTestCase { product_type: "standing desk", query: "standing", expected: 0.5, } .test(); ProductTypeTestCase { product_type: "standing desk", query: "phone", expected: 0.0, } .test(); // Extra terms in the query are ignored ProductTypeTestCase { product_type: "standing desk", query: "standing desk for my office", expected: 1.0, } .test(); // Empty product_type should always score 0.0 ProductTypeTestCase { product_type: "", query: "", expected: 0.0, } .test(); // Matching should be case insensitive ProductTypeTestCase { product_type: "desk", query: "Desk", expected: 1.0, } .test(); // Extra spaces are ignored ProductTypeTestCase { product_type: "desk", query: " desk ", expected: 1.0, } .test(); } }