// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license. // NOTE to all: the code in this crate sometimes slightly diverges from the // precise wording of the spec, because rust-url does not expose all the // routines exactly as the spec wants. The end behaviour should be identical. use crate::Error; // https://wicg.github.io/urlpattern/#canon-encoding-callbacks // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-protocol pub fn canonicalize_protocol(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } url::Url::parse(&format!("{value}://dummy.test")) .map(|url| url.scheme().to_owned()) .map_err(Error::Url) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-username pub fn canonicalize_username(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); // Note: unwrap is safe, because this is a HTTP url that supports username. url.set_username(value).unwrap(); Ok(url.username().to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-password pub fn canonicalize_password(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); // Note: unwrap is safe, because this is a HTTP url that supports password. url.set_password(Some(value)).unwrap(); Ok(url.password().unwrap().to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-hostname pub fn canonicalize_hostname(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); url::quirks::set_hostname(&mut url, value) .map_err(|_| Error::Url(url::ParseError::InvalidDomainCharacter))?; Ok(url::quirks::hostname(&url).to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname pub fn canonicalize_ipv6_hostname(value: &str) -> Result { let valid_ipv6 = value .chars() .all(|c| c.is_ascii_hexdigit() || matches!(c, '[' | ']' | ':')); if !valid_ipv6 { Err(Error::Url(url::ParseError::InvalidIpv6Address)) } else { Ok(value.to_ascii_lowercase()) } } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-port pub fn canonicalize_port( value: &str, mut protocol: Option<&str>, ) -> Result { if value.is_empty() { return Ok(String::new()); } if let Some("") = protocol { protocol = None; } // Note: this unwrap is safe, because the protocol was previously parsed to be // valid. let mut url = url::Url::parse(&format!("{}://dummy.test", protocol.unwrap_or("dummy"))) .unwrap(); url::quirks::set_port(&mut url, value) .map_err(|_| Error::Url(url::ParseError::InvalidPort))?; Ok(url::quirks::port(&url).to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-pathname pub fn canonicalize_pathname(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let leading_slash = value.starts_with('/'); let modified_value = if !leading_slash { format!("/-{value}") } else { value.to_string() }; let mut url = url::Url::parse("http://dummy.test").unwrap(); url.set_path(&modified_value); let mut pathname = url::quirks::pathname(&url); // If the original value didn't have a leading slash, we prepended "/-". // Only strip this prefix if it's still there after URL parsing. // If the ".." segments were resolved, the "/-" prefix may have been removed. if !leading_slash && pathname.starts_with("/-") { pathname = &pathname[2..]; } Ok(pathname.to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname pub fn canonicalize_an_opaque_pathname(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("data:dummy,test").unwrap(); url.set_path(value); Ok(url::quirks::pathname(&url).to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-search pub fn canonicalize_search(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); url.set_query(Some(value)); Ok(url.query().unwrap_or("").to_string()) } // Ref: https://wicg.github.io/urlpattern/#canonicalize-a-search pub fn canonicalize_hash(value: &str) -> Result { if value.is_empty() { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); url.set_fragment(Some(value)); Ok(url.fragment().unwrap_or("").to_string()) } #[derive(Debug, Eq, PartialEq)] pub enum ProcessType { Pattern, Url, } // Ref: https://wicg.github.io/urlpattern/#process-protocol-for-init pub fn process_protocol_init( value: &str, kind: &ProcessType, ) -> Result { let stripped_value = value.strip_suffix(':').unwrap_or(value); if kind == &ProcessType::Pattern { Ok(stripped_value.to_string()) } else { canonicalize_protocol(stripped_value) } } // Ref: https://wicg.github.io/urlpattern/#process-username-for-init pub fn process_username_init( value: &str, kind: &ProcessType, ) -> Result { if kind == &ProcessType::Pattern { Ok(value.to_string()) } else { canonicalize_username(value) } } // Ref: https://wicg.github.io/urlpattern/#process-password-for-init pub fn process_password_init( value: &str, kind: &ProcessType, ) -> Result { if kind == &ProcessType::Pattern { Ok(value.to_string()) } else { canonicalize_password(value) } } // Ref: https://wicg.github.io/urlpattern/#process-hostname-for-init pub fn process_hostname_init( value: &str, kind: &ProcessType, ) -> Result { if kind == &ProcessType::Pattern { Ok(value.to_string()) } else { canonicalize_hostname(value) } } // Ref: https://wicg.github.io/urlpattern/#process-port-for-init pub fn process_port_init( port_value: &str, protocol_value: Option<&str>, kind: &ProcessType, ) -> Result { if kind == &ProcessType::Pattern { Ok(port_value.to_string()) } else { canonicalize_port(port_value, protocol_value) } } // Ref: https://wicg.github.io/urlpattern/#process-pathname-for-init pub fn process_pathname_init( pathname_value: &str, protocol_value: Option<&str>, kind: &ProcessType, ) -> Result { if kind == &ProcessType::Pattern { Ok(pathname_value.to_string()) } else { // A path is non-opaque if: // 1. The protocol is empty, OR // 2. The protocol is a special scheme (http, https, etc.), OR // 3. The pathname has a leading '/' (indicating hierarchical path) let is_non_opaque = match protocol_value { Some(protocol) if protocol.is_empty() || is_special_scheme(protocol) => { true } _ => { // For non-special schemes, treat as non-opaque if pathname starts with '/' pathname_value.starts_with('/') } }; if is_non_opaque { canonicalize_pathname(pathname_value) } else { canonicalize_an_opaque_pathname(pathname_value) } } } // Ref: https://wicg.github.io/urlpattern/#process-search-for-init pub fn process_search_init( value: &str, kind: &ProcessType, ) -> Result { let stripped_value = if value.starts_with('?') { value.get(1..).unwrap() } else { value }; if kind == &ProcessType::Pattern { Ok(stripped_value.to_string()) } else { canonicalize_search(stripped_value) } } // Ref: https://wicg.github.io/urlpattern/#process-hash-for-init pub fn process_hash_init( value: &str, kind: &ProcessType, ) -> Result { let stripped_value = if value.starts_with('#') { value.get(1..).unwrap() } else { value }; if kind == &ProcessType::Pattern { Ok(stripped_value.to_string()) } else { canonicalize_hash(stripped_value) } } pub fn is_special_scheme(scheme: &str) -> bool { matches!(scheme, "http" | "https" | "ws" | "wss" | "ftp" | "file") } pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> { match scheme { "http" => Some("80"), "https" => Some("443"), "ws" => Some("80"), "wss" => Some("443"), "ftp" => Some("21"), "file" => None, _ => None, } } // Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string pub fn process_base_url(input: &str, kind: &ProcessType) -> String { if kind != &ProcessType::Pattern { input.to_string() } else { escape_pattern_string(input) } } // Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string pub fn escape_pattern_string(input: &str) -> String { assert!(input.is_ascii()); let mut result = String::new(); for char in input.chars() { if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') { result.push('\\'); } result.push(char); } result }