//! The adblock [`Engine`] is the primary interface for adblocking. use crate::blocker::{Blocker, BlockerResult}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder; use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError}; use crate::filters::cosmetic::CosmeticFilter; use crate::filters::fb_builder::EngineFlatBuilder; use crate::filters::fb_network_builder::NetworkRulesBuilder; use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef}; use crate::filters::network::NetworkFilter; use crate::flatbuffers::containers::flat_serialize::FlatSerialize; use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend}; use std::collections::HashSet; /// Drives high-level blocking logic and is responsible for loading filter lists into an optimized /// format that can be queried efficiently. /// /// For performance optimization reasons, the [`Engine`] is not designed to have rules added or /// removed after its initial creation. Making changes to the rules loaded is accomplished by /// creating a new engine to replace it. /// /// ## Usage /// /// ### Initialization /// /// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list /// header metadata. Once all lists have been composed together, you can call /// [`Engine::from_filter_set`] to start using them for blocking. /// /// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet /// injections. These are known as [`Resource`]s, and can be provided with /// [`Engine::use_resources`]. See the [`crate::resources`] module for more information. /// /// ### Network blocking /// /// Use the [`Engine::check_network_request`] method to determine how to handle a network request. /// /// If you _only_ need network blocking, consider using a [`Blocker`] directly. /// /// ### Cosmetic filtering /// /// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a /// particular page before it starts loading. /// /// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to /// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that /// should be hidden dynamically. pub struct Engine { blocker: Blocker, cosmetic_cache: CosmeticFilterCache, resources: ResourceStorage, filter_data_context: FilterDataContextRef, } #[cfg(feature = "debug-info")] pub struct EngineDebugInfo { pub regex_debug_info: crate::regex_manager::RegexDebugInfo, pub flatbuffer_size: usize, } impl Default for Engine { fn default() -> Self { Self::from_filter_set(FilterSet::new(false), false) } } impl Engine { /// Loads rules in a single format, enabling optimizations and discarding debug information. pub fn from_rules( rules: impl IntoIterator>, opts: ParseOptions, ) -> Self { let mut filter_set = FilterSet::new(false); filter_set.add_filters(rules, opts); Self::from_filter_set(filter_set, true) } /// Loads rules, enabling optimizations and including debug information. pub fn from_rules_debug( rules: impl IntoIterator>, opts: ParseOptions, ) -> Self { Self::from_rules_parametrised(rules, opts, true, true) } pub fn from_rules_parametrised( filter_rules: impl IntoIterator>, opts: ParseOptions, debug: bool, optimize: bool, ) -> Self { let mut filter_set = FilterSet::new(debug); filter_set.add_filters(filter_rules, opts); Self::from_filter_set(filter_set, optimize) } #[cfg(test)] pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache { self.cosmetic_cache } #[cfg(test)] pub(crate) fn filter_data_context(self) -> FilterDataContextRef { self.filter_data_context } /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding /// rules from multiple sources. pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { let FilterSet { network_filters, cosmetic_filters, .. } = set; let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize); let filter_data_context = FilterDataContext::new(memory); Self { blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( &filter_data_context, )), resources: ResourceStorage::default(), filter_data_context, } } /// Check if a request for a network resource from `url`, of type `request_type`, initiated by /// `source_url`, should be blocked. pub fn check_network_request(&self, request: &Request) -> BlockerResult { self.blocker.check(request, &self.resources) } #[cfg(test)] pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool { self.blocker.check_exceptions(request) } pub fn check_network_request_subset( &self, request: &Request, previously_matched_rule: bool, force_check_exceptions: bool, ) -> BlockerResult { self.blocker.check_parameterised( request, &self.resources, previously_matched_rule, force_check_exceptions, ) } /// Returns a string containing any additional CSP directives that should be added to this /// request's response. Only applies to document and subdocument requests. /// /// If multiple policies are present from different rules, they will be joined by commas. pub fn get_csp_directives(&self, request: &Request) -> Option { self.blocker.get_csp_directives(request) } /// Sets this engine's tags to be _only_ the ones provided in `tags`. /// /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` /// option. pub fn use_tags(&mut self, tags: &[&str]) { self.blocker.use_tags(tags); } /// Sets this engine's tags to additionally include the ones provided in `tags`. /// /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` /// option. pub fn enable_tags(&mut self, tags: &[&str]) { self.blocker.enable_tags(tags); } /// Sets this engine's tags to no longer include the ones provided in `tags`. /// /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` /// option. pub fn disable_tags(&mut self, tags: &[&str]) { self.blocker.disable_tags(tags); } /// Checks if a given tag exists in this engine. /// /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` /// option. pub fn tag_exists(&self, tag: &str) -> bool { self.blocker.tags_enabled().contains(&tag.to_owned()) } /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`. /// /// The resources will be held in-memory. If you have special caching, management, or sharing /// requirements, consider [Engine::use_resource_storage] instead. pub fn use_resources(&mut self, resources: impl IntoIterator) { let storage = crate::resources::InMemoryResourceStorage::from_resources(resources); self.use_resource_storage(storage); } /// Sets this engine's backend for [Resource] storage to a custom implementation of /// [ResourceStorageBackend]. /// /// If you're okay with the [Engine] holding these resources in-memory, use /// [Engine::use_resources] instead. #[cfg(not(feature = "single-thread"))] pub fn use_resource_storage( &mut self, resources: R, ) { self.resources = ResourceStorage::from_backend(resources); } /// Sets this engine's backend for [Resource] storage to a custom implementation of /// [ResourceStorageBackend]. /// /// If you're okay with the [Engine] holding these resources in-memory, use /// [Engine::use_resources] instead. #[cfg(feature = "single-thread")] pub fn use_resource_storage(&mut self, resources: R) { self.resources = ResourceStorage::from_backend(resources); } // Cosmetic filter functionality /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of /// CSS selectors corresponding to rules referencing those classes or ids, provided that the /// corresponding rules are not excepted. /// /// `exceptions` should be passed directly from `UrlSpecificResources`. pub fn hidden_class_id_selectors( &self, classes: impl IntoIterator>, ids: impl IntoIterator>, exceptions: &HashSet, ) -> Vec { self.cosmetic_cache .hidden_class_id_selectors(classes, ids, exceptions) } /// Returns a set of cosmetic filter resources required for a particular url. Once this has /// been called, all CSS ids and classes on a page should be passed to /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the /// returned `generichide` value is false). pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources { let request = if let Ok(request) = Request::new(url, url, "document") { request } else { return UrlSpecificResources::empty(); }; let generichide = self.blocker.check_generic_hide(&request); self.cosmetic_cache.hostname_cosmetic_resources( &self.resources, &request.hostname, generichide, ) } pub fn set_regex_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) { self.blocker.set_regex_discard_policy(new_discard_policy); } #[cfg(test)] pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> { self.blocker.borrow_regex_manager() } #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.blocker.discard_regex(regex_id); } #[cfg(feature = "debug-info")] pub fn get_debug_info(&self) -> EngineDebugInfo { EngineDebugInfo { regex_debug_info: self.blocker.get_regex_debug_info(), flatbuffer_size: self.filter_data_context.memory.data().len(), } } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. pub fn serialize(&self) -> Vec { let data = self.filter_data_context.memory.data(); serialize_dat_file(data) } /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. /// /// Note that the binary format has a built-in version number that may be incremented. There is /// no guarantee that later versions of the format will be deserializable across minor versions /// of adblock-rust; the format is provided only as a caching optimization. pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> { let current_tags = self.blocker.tags_enabled(); let data = deserialize_dat_file(serialized)?; let memory = VerifiedFlatbufferMemory::from_raw(data) .map_err(DeserializationError::FlatBufferParsingError)?; let context = FilterDataContext::new(memory); self.filter_data_context = context; self.blocker = Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context)); self.blocker .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone( &self.filter_data_context, )); Ok(()) } } /// Static assertions for `Engine: Send + Sync` traits. #[cfg(not(feature = "single-thread"))] fn _assertions() { fn _assert_send() {} fn _assert_sync() {} _assert_send::(); _assert_sync::(); } fn make_flatbuffer( network_filters: Vec, cosmetic_filters: Vec, optimize: bool, ) -> VerifiedFlatbufferMemory { let mut builder = EngineFlatBuilder::default(); let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize); let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder); let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder); let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder); builder.finish(network_rules, cosmetic_rules) } #[cfg(test)] #[path = "../tests/unit/engine.rs"] mod unit_tests;