//! Contains HTTP symbol retrieval specific functionality use crate::*; use reqwest::{redirect, Client, Url}; use std::io::{self, Write}; use std::path::Path; use std::str::FromStr; use std::time::Duration; use tempfile::NamedTempFile; use tracing::{debug, trace, warn}; /// A key that uniquely identifies a File associated with a module type FileKey = (ModuleKey, FileKind); /// An implementation of `SymbolSupplier` that loads Breakpad text-format /// symbols from HTTP URLs. /// /// See [`crate::breakpad_sym_lookup`] for details on how paths are searched. pub struct HttpSymbolSupplier { /// File paths that are known to be in the cache #[allow(clippy::type_complexity)] cached_file_paths: CacheMap), FileError>>, /// HTTP Client to use for fetching symbols. client: Client, /// URLs to search for symbols. urls: Vec, /// A `SimpleSymbolSupplier` to use for local symbol paths. local: SimpleSymbolSupplier, /// A path at which to cache downloaded symbols. /// /// We recommend using a subdirectory of `std::env::temp_dir()`, as this /// will be your OS's intended location for tempory files. This should /// give you free garbage collection of the cache while still allowing it /// to function between runs. cache: PathBuf, /// A path to a temporary location where downloaded symbols can be written /// before being atomically swapped into the cache. /// /// We recommend using `std::env::temp_dir()`, as this will be your OS's /// intended location for temporary files. tmp: PathBuf, } impl HttpSymbolSupplier { /// Create a new `HttpSymbolSupplier`. /// /// Symbols will be searched for in each of `local_paths` and `cache` first, /// then via HTTP at each of `urls`. If a symbol file is found via HTTP it /// will be saved under `cache`. pub fn new( urls: Vec, cache: PathBuf, tmp: PathBuf, mut local_paths: Vec, timeout: Duration, ) -> HttpSymbolSupplier { let client = Client::builder().timeout(timeout).build().unwrap(); let urls = urls .into_iter() .filter_map(|mut u| { if !u.ends_with('/') { u.push('/'); } Url::parse(&u).ok() }) .collect(); local_paths.push(cache.clone()); let local = SimpleSymbolSupplier::new(local_paths); let cached_file_paths = Default::default(); HttpSymbolSupplier { client, cached_file_paths, urls, local, cache, tmp, } } #[tracing::instrument(level = "trace", skip(self, module), fields(module = crate::basename(&module.code_file())))] pub async fn locate_file_internal( &self, module: &(dyn Module + Sync), file_kind: FileKind, ) -> Result<(PathBuf, Option), FileError> { self.cached_file_paths .cache_default(file_key(module, file_kind)) .get(|| async { // First look for the file in the cache if let Ok(path) = self.local.locate_file(module, file_kind).await { return Ok((path, None)); } // Then try to download the file // FIXME: if we try to parallelize this with `join` then if we have multiple hits // we'll end up downloading all of them at once and having them race to write into // the cache... is that ok? Maybe? Since only one will ever win the swap, and it's // unlikely to get multiple hits... this might actually be ok! if let Some(lookup) = lookup(module, file_kind) { for url in &self.urls { let fetch = fetch_lookup(&self.client, url, &lookup, &self.cache, &self.tmp).await; if let Ok((path, url)) = fetch { return Ok((path, url)); } } // If we're allowed to look for mozilla's special CAB paths, do that if cfg!(feature = "mozilla_cab_symbols") { for url in &self.urls { let fetch = fetch_cab_lookup( &self.client, url, &lookup, &self.cache, &self.tmp, ) .await; if let Ok((path, url)) = fetch { return Ok((path, url)); } } } } Err(FileError::NotFound) }) .await .as_ref() .clone() } } fn file_key(module: &(dyn Module + Sync), file_kind: FileKind) -> FileKey { (module_key(module), file_kind) } fn create_cache_file(tmp_path: &Path, final_path: &Path) -> io::Result { // Use tempfile to save things to our cache to ensure proper // atomicity of writes. We may want multiple instances of rust-minidump // to be sharing a cache, and we don't want one instance to see another // instance's partially written results. // // tempfile is designed explicitly for this purpose, and will handle all // the platform-specific details and do its best to cleanup if things // crash. // First ensure that the target directory in the cache exists let base = final_path .parent() .ok_or_else(|| io::Error::other(format!("Bad cache path: {final_path:?}")))?; fs::create_dir_all(base)?; NamedTempFile::new_in(tmp_path) } fn commit_cache_file(mut temp: NamedTempFile, final_path: &Path, url: &Url) -> io::Result<()> { // Append any extra metadata we also want to be cached as "INFO" lines, // because this is an established format that parsers will ignore the // contents of by default. // INFO URL allows us to properly report the url we retrieved a symbol file // from, even when the file is loaded from our on-disk cache. let cache_metadata = format!("INFO URL {url}\n"); temp.write_all(cache_metadata.as_bytes())?; // TODO: don't do this if final_path.exists() { fs::remove_file(final_path)?; } // If another process already wrote this entry, prefer their value to // avoid needless file system churn. temp.persist_noclobber(final_path)?; Ok(()) } /// Perform a code_file/code_identifier lookup for a specific symbol server. async fn individual_lookup_debug_info_by_code_info( base_url: &Url, lookup_path: &str, ) -> Option { let url = base_url.join(lookup_path).ok()?; debug!("Trying code file / code identifier lookup: {}", url); // This should not follow redirects--we want the next url if there is one let no_redirects_client = Client::builder() .redirect(redirect::Policy::none()) .build() .ok()?; let response = no_redirects_client.get(url.clone()).send().await; if let Ok(res) = response { let res_status = res.status(); if res_status == reqwest::StatusCode::FOUND || res_status == reqwest::StatusCode::MOVED_PERMANENTLY { let location_header = res.headers().get("Location")?; let mut new_url = location_header.to_str().ok()?; if new_url.starts_with('/') { new_url = new_url.strip_prefix('/').unwrap_or(new_url); } // new_url looks like some/path/stuff/xul.pdb/somedebugid/xul.sym and we want the debug // file and debug id portions which are at fixed indexes from the end let mut parts = new_url.rsplit('/'); let debug_identifier_part = parts.nth(1)?; let debug_identifier = DebugId::from_str(debug_identifier_part).ok()?; let debug_file_part = parts.next()?; let debug_file = String::from(debug_file_part); debug!("Found debug info {} {}", debug_file, debug_identifier); return Some(DebugInfoResult { debug_file, debug_identifier, }); } } None } /// Given a vector of symbol urls and a module with a code_file and code_identifier, /// this tries to request a symbol file using the code file and code identifier. /// /// `//.sym` /// /// If the symbol server returns an HTTP 302 redirect, the Location header will /// have the correct download API url with the debug file and debug identifier. /// /// This is supported by tecken /// /// This returns a DebugInfoResult with the new debug file and debug identifier /// or None. async fn lookup_debug_info_by_code_info( symbol_urls: &Vec, module: &(dyn Module + Sync), ) -> Option { let lookup_path = code_info_breakpad_sym_lookup(module)?; for base_url in symbol_urls { if let Some(result) = individual_lookup_debug_info_by_code_info(base_url, &lookup_path).await { return Some(result); } } debug!( "No debug file / debug id found with lookup path {}.", lookup_path ); None } /// Fetch a symbol file from the URL made by combining `base_url` and `rel_path` using `client`, /// save the file contents under `cache` + `rel_path` and also return them. async fn fetch_symbol_file( client: &Client, base_url: &Url, module: &(dyn Module + Sync), cache: &Path, tmp: &Path, ) -> Result { trace!("HttpSymbolSupplier trying symbol server {}", base_url); // This function is a bit of a complicated mess because we want to write // the input to our symbol cache, but we're a streaming parser. So we // use the bare SymbolFile::parse to get access to the contents of // the input stream as it's downloaded+parsed to write it to disk. // // Note that caching is strictly "optional" because it's more important // to parse the symbols. So if at any point the caching i/o fails, we just // give up on caching but let the parse+download continue. // First try to GET the file from a server let sym_lookup = breakpad_sym_lookup(module).ok_or(SymbolError::MissingDebugFileOrId)?; let mut url = base_url .join(&sym_lookup.server_rel) .map_err(|_| SymbolError::NotFound)?; let code_id = module.code_identifier().unwrap_or_default(); url.query_pairs_mut() .append_pair("code_file", crate::basename(&module.code_file())) .append_pair("code_id", code_id.as_str()); debug!("Trying {}", url); let res = client .get(url.clone()) .send() .await .and_then(|res| res.error_for_status()) .map_err(|_| SymbolError::NotFound)?; // Now try to create the temp cache file (not yet in the cache) let final_cache_path = cache.join(sym_lookup.cache_rel); let mut temp = create_cache_file(tmp, &final_cache_path) .map_err(|e| { warn!("Failed to save symbol file in local disk cache: {}", e); }) .ok(); // Now stream parse the file as it downloads. let mut symbol_file = SymbolFile::parse_async(res, |data| { // While we're downloading+parsing, save this data to the the disk cache too if let Some(file) = temp.as_mut() { if let Err(e) = file.write_all(data) { // Give up on caching this. warn!("Failed to save symbol file in local disk cache: {}", e); temp = None; } } }) .await?; // Make note of what URL this symbol file was downloaded from. symbol_file.url = Some(url.to_string()); // Try to finish the cache file and atomically swap it into the cache. if let Some(temp) = temp { let _ = commit_cache_file(temp, &final_cache_path, &url).map_err(|e| { warn!("Failed to save symbol file in local disk cache: {}", e); }); } Ok(symbol_file) } /// Like fetch_symbol_file but instead of parsing the file live, we just download it opaquely based /// on the given Lookup. /// /// The returned value is the path to the downloaded file and the url it was downloaded from. async fn fetch_lookup( client: &Client, base_url: &Url, lookup: &FileLookup, cache: &Path, tmp: &Path, ) -> Result<(PathBuf, Option), SymbolError> { // First try to GET the file from a server let url = base_url .join(&lookup.server_rel) .map_err(|_| SymbolError::NotFound)?; debug!("Trying {}", url); let mut res = client .get(url.clone()) .send() .await .and_then(|res| res.error_for_status()) .map_err(|_| SymbolError::NotFound)?; // Now try to create the temp cache file (not yet in the cache) let final_cache_path = cache.join(&lookup.cache_rel); let mut temp = create_cache_file(tmp, &final_cache_path)?; // Now stream the contents to our file while let Some(chunk) = res.chunk().await.map_err(std::io::Error::other)? { temp.write_all(&chunk[..])?; } // And swap it into the cache temp.persist_noclobber(&final_cache_path) .map_err(std::io::Error::other)?; trace!("symbols: fetched native binary: {}", lookup.cache_rel); Ok((final_cache_path, Some(url))) } #[cfg(feature = "mozilla_cab_symbols")] async fn fetch_cab_lookup( client: &Client, base_url: &Url, lookup: &FileLookup, cache: &Path, tmp: &Path, ) -> Result<(PathBuf, Option), FileError> { let cab_lookup = moz_lookup(lookup.clone()); // First try to GET the file from a server let url = base_url .join(&cab_lookup.server_rel) .map_err(|_| FileError::NotFound)?; debug!("Trying {}", url); let res = client .get(url.clone()) .send() .await .and_then(|res| res.error_for_status()) .map_err(|_| FileError::NotFound)?; let cab_bytes = res.bytes().await.map_err(|_| FileError::NotFound)?; let final_cache_path = unpack_cabinet_file(&cab_bytes, lookup, cache, tmp).map_err(|_| FileError::NotFound)?; trace!("symbols: fetched native binary: {}", lookup.cache_rel); Ok((final_cache_path, Some(url))) } #[cfg(not(feature = "mozilla_cab_symbols"))] async fn fetch_cab_lookup( _client: &Client, _base_url: &Url, _lookup: &FileLookup, _cache: &Path, _tmp: &Path, ) -> Result<(PathBuf, Option), FileError> { Err(FileError::NotFound) } #[cfg(feature = "mozilla_cab_symbols")] pub fn unpack_cabinet_file( buf: &[u8], lookup: &FileLookup, cache: &Path, tmp: &Path, ) -> Result { trace!("symbols: unpacking CAB file: {}", lookup.cache_rel); // try to find a file in a cabinet archive and unpack it to the destination use cab::Cabinet; use std::io::Cursor; fn get_cabinet_file( cab: &Cabinet>, file_name: &str, ) -> Result { for folder in cab.folder_entries() { for file in folder.file_entries() { let cab_file_name = file.name(); if cab_file_name.ends_with(file_name) { return Ok(cab_file_name.to_string()); } } } Err(std::io::Error::from(std::io::ErrorKind::NotFound)) } let final_cache_path = cache.join(&lookup.cache_rel); let cursor = Cursor::new(buf); let mut cab = Cabinet::new(cursor)?; let file_name = final_cache_path.file_name().unwrap().to_string_lossy(); let cab_file = get_cabinet_file(&cab, &file_name)?; let mut reader = cab.read_file(&cab_file)?; // Now try to create the temp cache file (not yet in the cache) let mut temp = create_cache_file(tmp, &final_cache_path)?; std::io::copy(&mut reader, &mut temp)?; // And swap it into the cache temp.persist_noclobber(&final_cache_path) .map_err(std::io::Error::other)?; Ok(final_cache_path) } /// Try to lookup native binaries in the cache and by querying the symbol server #[async_trait] impl SymbolSupplier for HttpSymbolSupplier { #[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(file = crate::basename(&module.code_file())))] async fn locate_symbols( &self, module: &(dyn Module + Sync), ) -> Result { // If we don't have a debug_file or debug_identifier, then try to get it // from a symbol server. let mut debug_file = module.debug_file().map(|name| name.into_owned()); let mut debug_id = module.debug_identifier(); let missing_debug_info = debug_file.is_none() || debug_id.is_none(); let extra_debug_info; if missing_debug_info { debug!("Missing debug file or debug identifier--trying lookup with code info"); extra_debug_info = lookup_debug_info_by_code_info(&self.urls, module).await; if let Some(debug_info_result) = &extra_debug_info { debug_file = Some(debug_info_result.debug_file.clone()); debug_id = Some(debug_info_result.debug_identifier); } } else { extra_debug_info = None; } // Build a minimal module for lookups with the debug file and debug // identifier we need to use let lookup_module = SimpleModule::from_basic_info( debug_file, debug_id, Some(module.code_file().into_owned()), module.code_identifier(), ); // First: try local paths for sym files let local_result = self.local.locate_symbols(&lookup_module).await; if !matches!(local_result, Err(SymbolError::NotFound)) { // Everything but NotFound prevents cascading return local_result.map(|r| LocateSymbolsResult { symbols: r.symbols, extra_debug_info: r.extra_debug_info.or(extra_debug_info), }); } trace!("HttpSymbolSupplier search (SimpleSymbolSupplier found nothing)"); // Second: try to directly download sym files for url in &self.urls { // First, try to get a breakpad .sym file from the symbol server let sym = fetch_symbol_file(&self.client, url, &lookup_module, &self.cache, &self.tmp).await; match sym { Ok(symbols) => { trace!("HttpSymbolSupplier parsed file!"); return Ok(LocateSymbolsResult { symbols, extra_debug_info, }); } Err(e) => { trace!("HttpSymbolSupplier failed: {}", e); } } } // If we get this far, we have failed to find anything Err(SymbolError::NotFound) } async fn locate_file( &self, module: &(dyn Module + Sync), file_kind: FileKind, ) -> Result { self.locate_file_internal(module, file_kind) .await .map(|(path, _url)| path) } }