/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::error::*; use crate::types::VisitType; use error_support::trace_error; use rusqlite::Connection; use types::Timestamp; #[derive(Debug, Clone, Copy, PartialEq)] enum RedirectBonus { Unknown, Redirect, Normal, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct FrecencySettings { // TODO: These probably should not all be i32s... pub num_visits: i32, // from "places.frecency.numVisits" pub first_bucket_cutoff_days: i32, // from "places.frecency.firstBucketCutoff" pub second_bucket_cutoff_days: i32, // from "places.frecency.secondBucketCutoff" pub third_bucket_cutoff_days: i32, // from "places.frecency.thirdBucketCutoff" pub fourth_bucket_cutoff_days: i32, // from "places.frecency.fourthBucketCutoff" pub first_bucket_weight: i32, // from "places.frecency.firstBucketWeight" pub second_bucket_weight: i32, // from "places.frecency.secondBucketWeight" pub third_bucket_weight: i32, // from "places.frecency.thirdBucketWeight" pub fourth_bucket_weight: i32, // from "places.frecency.fourthBucketWeight" pub default_bucket_weight: i32, // from "places.frecency.defaultBucketWeight" pub embed_visit_bonus: i32, // from "places.frecency.embedVisitBonus" pub framed_link_visit_bonus: i32, // from "places.frecency.framedLinkVisitBonus" pub link_visit_bonus: i32, // from "places.frecency.linkVisitBonus" pub typed_visit_bonus: i32, // from "places.frecency.typedVisitBonus" pub bookmark_visit_bonus: i32, // from "places.frecency.bookmarkVisitBonus" pub download_visit_bonus: i32, // from "places.frecency.downloadVisitBonus" pub permanent_redirect_visit_bonus: i32, // from "places.frecency.permRedirectVisitBonus" pub temporary_redirect_visit_bonus: i32, // from "places.frecency.tempRedirectVisitBonus" pub redirect_source_visit_bonus: i32, // from "places.frecency.redirectSourceVisitBonus" pub default_visit_bonus: i32, // from "places.frecency.defaultVisitBonus" pub unvisited_bookmark_bonus: i32, // from "places.frecency.unvisitedBookmarkBonus" pub unvisited_typed_bonus: i32, // from "places.frecency.unvisitedTypedBonus" pub reload_visit_bonus: i32, // from "places.frecency.reloadVisitBonus" } pub const DEFAULT_FRECENCY_SETTINGS: FrecencySettings = FrecencySettings { // These are the default values of the preferences. num_visits: 10, first_bucket_cutoff_days: 4, second_bucket_cutoff_days: 14, third_bucket_cutoff_days: 31, fourth_bucket_cutoff_days: 90, first_bucket_weight: 100, second_bucket_weight: 70, third_bucket_weight: 50, fourth_bucket_weight: 30, default_bucket_weight: 10, embed_visit_bonus: 0, framed_link_visit_bonus: 0, link_visit_bonus: 100, typed_visit_bonus: 2000, bookmark_visit_bonus: 75, download_visit_bonus: 0, permanent_redirect_visit_bonus: 0, temporary_redirect_visit_bonus: 0, redirect_source_visit_bonus: 25, default_visit_bonus: 0, unvisited_bookmark_bonus: 140, unvisited_typed_bonus: 200, reload_visit_bonus: 0, }; impl Default for FrecencySettings { #[inline] fn default() -> Self { DEFAULT_FRECENCY_SETTINGS } } impl FrecencySettings { // Note: in Places, `redirect` defaults to false. pub fn get_transition_bonus( &self, visit_type: Option, visited: bool, redirect: bool, ) -> i32 { if redirect { return self.redirect_source_visit_bonus; } match (visit_type, visited) { (Some(VisitType::Link), _) => self.link_visit_bonus, (Some(VisitType::Embed), _) => self.embed_visit_bonus, (Some(VisitType::FramedLink), _) => self.framed_link_visit_bonus, (Some(VisitType::RedirectPermanent), _) => self.temporary_redirect_visit_bonus, (Some(VisitType::RedirectTemporary), _) => self.permanent_redirect_visit_bonus, (Some(VisitType::Download), _) => self.download_visit_bonus, (Some(VisitType::Reload), _) => self.reload_visit_bonus, (Some(VisitType::Typed), true) => self.typed_visit_bonus, (Some(VisitType::Typed), false) => self.unvisited_typed_bonus, (Some(VisitType::Bookmark), true) => self.bookmark_visit_bonus, (Some(VisitType::Bookmark), false) => self.unvisited_bookmark_bonus, (Some(VisitType::UpdatePlace), _) => self.default_visit_bonus, // 0 == undefined (see bug 375777 in bugzilla for details) (None, _) => self.default_visit_bonus, } } fn get_frecency_aged_weight(&self, age_in_days: i32) -> i32 { if age_in_days <= self.first_bucket_cutoff_days { self.first_bucket_weight } else if age_in_days <= self.second_bucket_cutoff_days { self.second_bucket_weight } else if age_in_days <= self.third_bucket_cutoff_days { self.third_bucket_weight } else if age_in_days <= self.fourth_bucket_cutoff_days { self.fourth_bucket_weight } else { self.default_bucket_weight } } } struct FrecencyComputation<'db, 's> { conn: &'db Connection, settings: &'s FrecencySettings, page_id: i64, most_recent_redirect_bonus: RedirectBonus, typed: i32, visit_count: i32, foreign_count: i32, is_query: bool, } impl<'db, 's> FrecencyComputation<'db, 's> { fn new( conn: &'db Connection, settings: &'s FrecencySettings, page_id: i64, most_recent_redirect_bonus: RedirectBonus, ) -> Result { let mut stmt = conn.prepare_cached( " SELECT typed, (visit_count_local + visit_count_remote) as visit_count, foreign_count, (substr(url, 0, 7) = 'place:') as is_query FROM moz_places WHERE id = :page_id ", )?; let mut rows = stmt.query(&[(":page_id", &page_id)])?; // trace_error to track down #4856 let row = trace_error!(rows.next()?.ok_or(rusqlite::Error::QueryReturnedNoRows))?; let typed: i32 = row.get("typed")?; let visit_count: i32 = row.get("visit_count")?; let foreign_count: i32 = row.get("foreign_count")?; let is_query: bool = row.get("is_query")?; Ok(Self { conn, settings, page_id, most_recent_redirect_bonus, typed, visit_count, foreign_count, is_query, }) } fn has_bookmark(&self) -> bool { self.foreign_count > 0 } fn score_recent_visits(&self) -> Result<(usize, f32)> { // Get a sample of the last visits to the page, to calculate its weight. // In case the visit is a redirect target, calculate the frecency // as if the original page was visited. // If it's a redirect source, we may want to use a lower bonus. let get_recent_visits = format!( "SELECT IFNULL(origin.visit_type, v.visit_type) AS visit_type, target.visit_type AS target_visit_type, v.visit_date FROM moz_historyvisits v LEFT JOIN moz_historyvisits origin ON origin.id = v.from_visit AND v.visit_type IN ({redirect_permanent}, {redirect_temporary}) LEFT JOIN moz_historyvisits target ON v.id = target.from_visit AND target.visit_type IN ({redirect_permanent}, {redirect_temporary}) WHERE v.place_id = :page_id ORDER BY v.visit_date DESC LIMIT {max_visits}", redirect_permanent = VisitType::RedirectPermanent as u8, redirect_temporary = VisitType::RedirectTemporary as u8, // in practice this is constant, so caching the query is fine. // (rusqlite has a max cache size too should things change) max_visits = self.settings.num_visits, ); let mut stmt = self.conn.prepare_cached(&get_recent_visits)?; let now = Timestamp::now(); let row_iter = stmt.query_and_then( &[(":page_id", &self.page_id)], |row| -> rusqlite::Result<_> { let visit_type = row.get::<_, Option>("visit_type")?.unwrap_or(0); let target_visit_type = row.get::<_, Option>("target_visit_type")?.unwrap_or(0); let visit_date: Timestamp = row.get("visit_date")?; let age_in_days = (now.as_millis() as f64 - visit_date.as_millis() as f64) / 86_400_000.0; Ok(( VisitType::from_primitive(visit_type), VisitType::from_primitive(target_visit_type), age_in_days.round() as i32, )) }, )?; let mut num_sampled_visits = 0; let mut points_for_sampled_visits = 0.0f32; for row_result in row_iter { let (visit_type, target_visit_type, age_in_days) = row_result?; // When adding a new visit, we should haved passed-in whether we should // use the redirect bonus. We can't fetch this information from the // database, because we only store redirect targets. // For older visits we extract the value from the database. let use_redirect_bonus = if self.most_recent_redirect_bonus == RedirectBonus::Unknown || num_sampled_visits > 0 { target_visit_type == Some(VisitType::RedirectPermanent) || (target_visit_type == Some(VisitType::RedirectTemporary) && visit_type != Some(VisitType::Typed)) } else { self.most_recent_redirect_bonus == RedirectBonus::Redirect }; let mut bonus = self.settings .get_transition_bonus(visit_type, true, use_redirect_bonus); if self.has_bookmark() { bonus += self .settings .get_transition_bonus(Some(VisitType::Bookmark), true, false); } if bonus != 0 { let weight = self.settings.get_frecency_aged_weight(age_in_days) as f32; points_for_sampled_visits += weight * (bonus as f32 / 100.0) } num_sampled_visits += 1; } Ok((num_sampled_visits, points_for_sampled_visits)) } fn get_frecency_for_sample(&self, num_sampled: usize, score: f32) -> i32 { if score == 0.0f32 { // We were unable to calculate points, maybe cause all the visits in the // sample had a zero bonus. Though, we know the page has some past valid // visit, or visit_count would be zero. Thus we set the frecency to // -1, so they are still shown in autocomplete. -1 } else { // Estimate frecency using the sampled visits. // Use ceil() so that we don't round down to 0, which // would cause us to completely ignore the place during autocomplete. ((self.visit_count as f32) * score.ceil() / (num_sampled as f32)).ceil() as i32 } } fn compute_unvisited_bookmark_frecency(&self) -> i32 { // Make it so something bookmarked and typed will have a higher frecency // than something just typed or just bookmarked. let mut bonus = self .settings .get_transition_bonus(Some(VisitType::Bookmark), false, false); if self.typed != 0 { bonus += self .settings .get_transition_bonus(Some(VisitType::Typed), false, false); } // Assume "now" as our age_in_days, so use the first bucket. let score = (self.settings.first_bucket_weight as f32) * (bonus as f32 / 100.0f32); // use ceil() so that we don't round down to 0, which // would cause us to completely ignore the place during autocomplete score.ceil() as i32 } } pub fn calculate_frecency( db: &Connection, settings: &FrecencySettings, page_id: i64, is_redirect: Option, ) -> Result { assert!(page_id > 0, "calculate_frecency given invalid page_id"); let most_recent_redirect_bonus = match is_redirect { None => RedirectBonus::Unknown, Some(true) => RedirectBonus::Redirect, Some(false) => RedirectBonus::Normal, }; let fc = FrecencyComputation::new(db, settings, page_id, most_recent_redirect_bonus)?; let (num_sampled_visits, sample_score) = if fc.visit_count > 0 { fc.score_recent_visits()? } else { (0, 0.0f32) }; Ok(if num_sampled_visits > 0 { // If we sampled some visits for this page, use the calculated weight. fc.get_frecency_for_sample(num_sampled_visits, sample_score) } else if !fc.has_bookmark() || fc.is_query { // Otherwise, this page has no visits, it may be bookmarked. 0 } else { // For unvisited bookmarks, produce a non-zero frecency, so that they show // up in URL bar autocomplete. fc.compute_unvisited_bookmark_frecency() }) }