//! Contains structures needed to describe network requests. use thiserror::Error; use crate::url_parser; use crate::utils; /// The type of resource requested from the URL endpoint. #[derive(Clone, PartialEq, Debug)] pub enum RequestType { Beacon, Csp, Document, Dtd, Fetch, Font, Image, Media, Object, Other, Ping, Script, Stylesheet, Subdocument, Websocket, Xlst, Xmlhttprequest, } /// Possible failure reasons when creating a [`Request`]. #[derive(Debug, Error, PartialEq)] pub enum RequestError { #[error("hostname parsing failed")] HostnameParseError, #[error("source hostname parsing failed")] SourceHostnameParseError, #[error("invalid Unicode provided")] UnicodeDecodingError, } impl From for RequestError { fn from(_err: idna::Errors) -> RequestError { RequestError::UnicodeDecodingError } } impl From for RequestError { fn from(_err: url::ParseError) -> RequestError { RequestError::HostnameParseError } } fn cpt_match_type(cpt: &str) -> RequestType { match cpt { "beacon" => RequestType::Ping, "csp_report" => RequestType::Csp, "document" | "main_frame" => RequestType::Document, "font" => RequestType::Font, "image" | "imageset" => RequestType::Image, "media" => RequestType::Media, "object" | "object_subrequest" => RequestType::Object, "ping" => RequestType::Ping, "script" => RequestType::Script, "stylesheet" => RequestType::Stylesheet, "sub_frame" | "subdocument" => RequestType::Subdocument, "websocket" => RequestType::Websocket, "xhr" | "xmlhttprequest" => RequestType::Xmlhttprequest, "other" => RequestType::Other, "speculative" => RequestType::Other, "web_manifest" => RequestType::Other, "xbl" => RequestType::Other, "xml_dtd" => RequestType::Other, "xslt" => RequestType::Other, _ => RequestType::Other, } } /// A network [`Request`], used as an interface for network blocking in the [`crate::Engine`]. #[derive(Clone, Debug)] pub struct Request { pub request_type: RequestType, pub is_http: bool, pub is_https: bool, pub is_supported: bool, pub is_third_party: bool, pub url: String, pub hostname: String, pub source_hostname_hashes: Option>, pub(crate) url_lower_cased: String, pub(crate) request_tokens: Vec, pub(crate) original_url: String, } impl Request { pub(crate) fn get_url(&self, case_sensitive: bool) -> &str { if case_sensitive { &self.url } else { &self.url_lower_cased } } pub fn get_tokens_for_match(&self) -> impl Iterator { // We start matching with source_hostname_hashes for optimization, // as it contains far fewer elements. self.source_hostname_hashes .as_ref() .into_iter() .flatten() .chain(self.get_tokens()) } pub fn get_tokens(&self) -> &Vec { &self.request_tokens } #[allow(clippy::too_many_arguments)] fn from_detailed_parameters( raw_type: &str, url: &str, schema: &str, hostname: &str, source_hostname: &str, third_party: bool, original_url: String, ) -> Request { let is_http: bool; let is_https: bool; let is_supported: bool; let request_type: RequestType; if schema.is_empty() { // no ':' was found is_https = true; is_http = false; is_supported = true; request_type = cpt_match_type(raw_type); } else { is_http = schema == "http"; is_https = !is_http && schema == "https"; let is_websocket = !is_http && !is_https && (schema == "ws" || schema == "wss"); is_supported = is_http || is_https || is_websocket; if is_websocket { request_type = RequestType::Websocket; } else { request_type = cpt_match_type(raw_type); } } let source_hostname_hashes = if !source_hostname.is_empty() { let mut hashes = Vec::with_capacity(4); hashes.push(utils::fast_hash(source_hostname)); for (i, c) in source_hostname.char_indices() { if c == '.' && i + 1 < source_hostname.len() { hashes.push(utils::fast_hash(&source_hostname[i + 1..])); } } Some(hashes) } else { None }; let url_lower_cased = url.to_ascii_lowercase(); Request { request_type, url: url.to_owned(), url_lower_cased: url_lower_cased.to_owned(), hostname: hostname.to_owned(), request_tokens: calculate_tokens(&url_lower_cased), source_hostname_hashes, is_third_party: third_party, is_http, is_https, is_supported, original_url, } } /// Construct a new [`Request`]. pub fn new(url: &str, source_url: &str, request_type: &str) -> Result { if let Some(parsed_url) = url_parser::parse_url(url) { if let Some(parsed_source) = url_parser::parse_url(source_url) { let source_domain = parsed_source.domain(); let third_party = source_domain != parsed_url.domain(); Ok(Request::from_detailed_parameters( request_type, &parsed_url.url, parsed_url.schema(), parsed_url.hostname(), parsed_source.hostname(), third_party, url.to_string(), )) } else { Ok(Request::from_detailed_parameters( request_type, &parsed_url.url, parsed_url.schema(), parsed_url.hostname(), "", true, url.to_string(), )) } } else { Err(RequestError::HostnameParseError) } } /// If you're building a [`Request`] in a context that already has access to parsed /// representations of the input URLs, you can use this constructor to avoid extra lookups from /// the public suffix list. Take care to pass data correctly. pub fn preparsed( url: &str, hostname: &str, source_hostname: &str, request_type: &str, third_party: bool, ) -> Request { let splitter = memchr::memchr(b':', url.as_bytes()).unwrap_or(0); let schema: &str = &url[..splitter]; Request::from_detailed_parameters( request_type, url, schema, hostname, source_hostname, third_party, url.to_string(), ) } } fn calculate_tokens(url_lower_cased: &str) -> Vec { let mut tokens = utils::TokensBuffer::default(); utils::tokenize_pooled(url_lower_cased, &mut tokens); // Add zero token as a fallback to wildcard rule bucket tokens.push(0); tokens.into_iter().collect() } #[cfg(test)] #[path = "../tests/unit/request.rs"] mod unit_tests;