/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use anyhow::Context; use futures_executor::{block_on, ThreadPool}; use minidump::{ system_info::Cpu, Minidump, MinidumpException, MinidumpMemoryList, MinidumpMiscInfo, MinidumpModule, MinidumpModuleList, MinidumpSystemInfo, MinidumpThread, MinidumpThreadList, MinidumpUnloadedModule, MinidumpUnloadedModuleList, Module, UnifiedMemoryList, }; use minidump_unwind::{ symbols::debuginfo::DebugInfoSymbolProvider, symbols::SymbolProvider, walk_stack, CallStack, CallStackInfo, SystemInfo, }; use std::borrow::Cow; use std::fmt::Debug; use std::fs::File; use std::io::Read; use std::path::{Path, PathBuf}; use std::sync::Arc; use serde_json::{json, Value as JsonValue}; #[cfg(windows)] mod windows; /// Analyze a minidump file to augment a corresponding .extra file with stack trace information. #[derive(Debug)] pub struct MinidumpAnalyzer<'a> { all_stacks: bool, minidump: &'a Path, extras: Option<&'a Path>, } impl<'a> MinidumpAnalyzer<'a> { /// Create a new MinidumpAnalyzer for the given minidump file. pub fn new(minidump: &'a Path) -> Self { MinidumpAnalyzer { all_stacks: false, minidump, extras: None, } } /// Generate all stacks, rather than just those of the crashing thread. pub fn all_threads(mut self, value: bool) -> Self { self.all_stacks = value; self } /// Set the extras file explicitly, rather than deriving from the minidump path. pub fn extras_file(mut self, path: &'a Path) -> Self { self.extras = Some(path); self } /// Return the effective extras file to read. pub fn get_extras_file(&self) -> Cow<'a, Path> { self.extras .map(Cow::Borrowed) .unwrap_or_else(|| Cow::Owned(extra_path_from_minidump(&self.minidump))) } /// Analyze the thread(s) and put stacks in the associated .extra file. pub fn analyze(self) -> anyhow::Result<()> { let extra_file = self.get_extras_file(); let extra_file = extra_file.as_ref(); log::info!("minidump file path: {}", self.minidump.display()); log::info!("extra file path: {}", extra_file.display()); let mut extra_json = parse_extra_file(extra_file)?; self.analyze_json(&mut extra_json)?; std::fs::write(extra_file, extra_json.to_string()) .context("while writing modified extra file")?; Ok(()) } /// Analyze the thread(s) and alter the given extra JSON data. pub fn analyze_json(self, extra_json: &mut JsonValue) -> anyhow::Result<()> { let minidump = Minidump::read_path(&self.minidump).context("while reading minidump")?; let proc = processor::Processor::new(&minidump)?; let (crashing_thread_idx, call_stacks) = proc.get_call_stacks(self.all_stacks)?; let crash_type = proc.crash_reason(); let crash_address = proc.crash_address(); let used_modules = get_used_modules(&call_stacks, proc.main_module()); // Set the `StackTraces` field in the extra JSON data. extra_json["StackTraces"] = json!({ "status": call_stack_status(&call_stacks), "crash_info": { "type": crash_type, "address": format!("{crash_address:#x}"), "crashing_thread": crashing_thread_idx // TODO: "assertion" when there's no crash indicator }, "main_module": proc.main_module().and_then(|m| module_index(&used_modules, m)), "modules": used_modules.iter().map(|module| { let code_file = module.code_file(); let code_file_path: &std::path::Path = code_file.as_ref().as_ref(); fn file_name_str(p: &std::path::Path) -> Option> { p.file_name().map(|s| s.to_string_lossy()) } json!({ "base_addr": format!("{:#x}", module.base_address()), "end_addr": format!("{:#x}", module.base_address() + module.size()), "filename": file_name_str(code_file_path), "code_id": module.code_identifier().as_ref().map(|id| id.as_str()), // `debug_file` may be a file path with additional components; we just want the // final component. See bug 1931237. "debug_file": module.debug_file().as_deref().and_then(|s| file_name_str(Path::new(s))), "debug_id": module.debug_identifier().map(|debug| debug.breakpad().to_string()), "version": module.version().as_deref() }) }).collect::>(), "unloaded_modules": proc.unloaded_modules().map(|module| { let code_file = module.code_file(); let code_file_path: &std::path::Path = code_file.as_ref().as_ref(); json!({ "base_addr": format!("{:#x}", module.base_address()), "end_addr": format!("{:#x}", module.base_address() + module.size()), "filename": code_file_path.file_name().map(|s| s.to_string_lossy()), "code_id": module.code_identifier().as_ref().map(|id| id.as_str()), }) }).collect::>(), "threads": call_stacks.iter().map(|call_stack| call_stack_to_json(call_stack, &used_modules)).collect::>() }); // StackTraces should not have null values (upstream processing expects the values to be // omitted). remove_nulls(&mut extra_json["StackTraces"]); let module_signature_info = proc.module_signature_info(); if !module_signature_info.is_null() { // ModuleSignatureInfo is sent as a crash annotation so must be string. This differs from // StackTraces which isn't actually sent (it's just read and removed by the crash // reporter client). extra_json["ModuleSignatureInfo"] = serde_json::to_string(&module_signature_info) .unwrap() .into(); } Ok(()) } } fn extra_path_from_minidump(minidump: &Path) -> PathBuf { let mut ret = minidump.to_owned(); ret.set_extension("extra"); ret } /// Parse the extra file (JSON). fn parse_extra_file(path: &Path) -> anyhow::Result { let mut extra_file_content = String::new(); File::open(path) .context("while opening extra file")? .read_to_string(&mut extra_file_content) .context("while reading extra file")?; serde_json::from_str(&extra_file_content).context("while parsing extra file JSON") } /// Get a list of modules used in call stacks, ordered by base address. fn get_used_modules<'a>( call_stacks: impl IntoIterator, main_module: Option<&'a MinidumpModule>, ) -> Vec<&'a MinidumpModule> { let mut v = call_stacks .into_iter() .flat_map(|call_stack| call_stack.frames.iter()) .filter_map(|frame| frame.module.as_ref()) // Always include the main module. .chain(main_module) .collect::>(); v.sort_by_key(|m| m.base_address()); v.dedup_by_key(|m| m.base_address()); v } mod processor { use super::*; pub struct Processor<'a> { runtime: ThreadPool, // We create a Context abstraction to easily spawn tokio tasks (which must be 'static). context: Arc>, } struct Ctx<'a> { module_list: MinidumpModuleList, unloaded_module_list: MinidumpUnloadedModuleList, memory_list: UnifiedMemoryList<'a>, thread_list: MinidumpThreadList<'a>, system_info: MinidumpSystemInfo, processor_system_info: SystemInfo, exception: MinidumpException<'a>, misc_info: Option, symbol_provider: BoxedSymbolProvider, } /// Concurrently execute the given futures, returning a Vec of the results. async fn concurrently<'a, I, Fut, R>(runtime: &R, iter: I) -> Vec where R: futures_util::task::Spawn, I: IntoIterator, Fut: std::future::Future + Send + 'a, // It's possible, though very obtuse, to support `'a` on the Output. We don't need it // though, so we keep it `'static` to simplify things. Fut::Output: Send + 'static, { use futures_util::{ future::{join_all, BoxFuture, FutureExt}, task::SpawnExt, }; join_all(iter.into_iter().map(|f| { let fut: BoxFuture<'a, Fut::Output> = f.boxed(); // Safety: It is safe to transmute to a static lifetime because we await the output of // the future while the `'a` lifetime is guaranteed to be valid (before exit from this // function). let fut: BoxFuture<'static, Fut::Output> = unsafe { std::mem::transmute(fut) }; runtime.spawn_with_handle(fut).expect("spawn failed") })) .await } impl<'a> Processor<'a> { pub fn new(minidump: &'a Minidump) -> anyhow::Result where T: std::ops::Deref, { let system_info = minidump.get_stream::()?; let misc_info = minidump.get_stream::().ok(); let module_list = minidump .get_stream::() .unwrap_or_default(); let unloaded_module_list = minidump .get_stream::() .unwrap_or_default(); let memory_list = minidump .get_stream::() .unwrap_or_default(); let thread_list = minidump.get_stream::()?; let exception = minidump.get_stream::()?; // TODO Something like SystemInfo::current() to get the active system's info? let processor_system_info = SystemInfo { os: system_info.os, os_version: None, os_build: None, cpu: system_info.cpu, cpu_info: None, cpu_microcode_version: None, cpu_count: 1, }; let symbol_provider = BoxedSymbolProvider(match system_info.cpu { // DebugInfoSymbolProvider only supports x86_64 and Arm64 right now Cpu::X86_64 | Cpu::Arm64 => Box::new(block_on(DebugInfoSymbolProvider::new( &system_info, &module_list, ))), _ => Box::new(breakpad_symbols::Symbolizer::new( breakpad_symbols::SimpleSymbolSupplier::new(vec![]), )), }); Ok(Processor { runtime: ThreadPool::new()?, context: Arc::new(Ctx { module_list, unloaded_module_list, memory_list: UnifiedMemoryList::Memory(memory_list), thread_list, system_info, processor_system_info, exception, misc_info, symbol_provider, }), }) } /// Get the minidump system info. pub fn system_info(&self) -> &MinidumpSystemInfo { &self.context.system_info } /// Get the crash reason string. pub fn crash_reason(&self) -> String { self.context .exception .get_crash_reason(self.system_info().os, self.system_info().cpu) .to_string() } /// Get the crash address. pub fn crash_address(&self) -> u64 { self.context .exception .get_crash_address(self.system_info().os, self.system_info().cpu) } /// Get call stacks for the given threads. /// /// Call stacks will be concurrently calculated. fn thread_call_stacks<'b>( &self, threads: impl IntoIterator>, ) -> anyhow::Result> { Ok(block_on(concurrently( &self.runtime, threads .into_iter() .map(|thread| self.context.thread_call_stack(thread)), )) .into_iter() .collect()) } /// Get the call stacks to process. /// /// The first value in the returned tuple is the crashing thread index. The second value is the /// set of call stacks to process. pub fn get_call_stacks(&self, all_stacks: bool) -> anyhow::Result<(usize, Vec)> { // Derive additional arguments used in stack walking. let crashing_thread = self .context .thread_list .get_thread(self.context.exception.get_crashing_thread_id()) .ok_or(anyhow::anyhow!( "exception thread id missing in thread list" ))?; Ok(if all_stacks { ( self.context .thread_list .threads .iter() .position(|t| t.raw.thread_id == crashing_thread.raw.thread_id) .expect("get_thread() returned a thread that doesn't exist"), self.thread_call_stacks(&self.context.thread_list.threads)?, ) } else { (0, self.thread_call_stacks([crashing_thread])?) }) } /// Get all modules, ordered by address. #[cfg(windows)] pub fn ordered_modules(&self) -> impl Iterator { self.context.module_list.by_addr() } /// Get all unloaded modules, ordered by address. pub fn unloaded_modules(&self) -> impl Iterator { self.context.unloaded_module_list.by_addr() } /// Get the index of the main module. /// /// Returns `None` when no main module exists (only when there are modules). pub fn main_module(&self) -> Option<&MinidumpModule> { self.context.module_list.main_module() } /// Get the json representation of module signature information. #[cfg(windows)] pub fn module_signature_info(&self) -> JsonValue { // JSON with structure { : [...], ... } let mut ret = json!({}); for module in self .ordered_modules() .map(|m| m as &dyn Module) .chain(self.unloaded_modules().map(|m| m as &dyn Module)) { let code_file = module.code_file(); let code_file_path: &std::path::Path = code_file.as_ref().as_ref(); if let Some(org_name) = windows::binary_org_name(code_file_path) { let entry = &mut ret[org_name]; if entry.is_null() { *entry = json!([]); } entry.as_array_mut().unwrap().push( code_file_path .file_name() .map(|s| s.to_string_lossy()) .into(), ); } else { log::warn!("couldn't get binary org name for {code_file}"); } } ret } /// Get the json representation of module signature information. /// /// This is currently unimplemented and returns null. #[cfg(unix)] pub fn module_signature_info(&self) -> JsonValue { JsonValue::Null } } impl Ctx<'_> { /// Compute the call stack for a single thread. pub async fn thread_call_stack(&self, thread: &MinidumpThread<'_>) -> CallStack { let context = if thread.raw.thread_id == self.exception.get_crashing_thread_id() { self.exception .context(&self.system_info, self.misc_info.as_ref()) } else { thread.context(&self.system_info, self.misc_info.as_ref()) } .map(|c| c.into_owned()); let mut stack_memory = thread.stack_memory(&self.memory_list); let Some(mut call_stack) = context.map(CallStack::with_context) else { return CallStack::with_info(thread.raw.thread_id, CallStackInfo::MissingContext); }; // Always choose the memory region that is referenced by the context, // as the exception context may refer to a different memory region than // the thread context, which in turn would fail to stack walk. if let Some(stack_ptr) = call_stack .frames .first() .map(|frame| frame.context.get_stack_pointer()) { let contains_stack_ptr = stack_memory .as_ref() .and_then(|memory| memory.get_memory_at_address::(stack_ptr)) .is_some(); if !contains_stack_ptr { stack_memory = self .memory_list .memory_at_address(stack_ptr) .or(stack_memory); } } walk_stack( 0, (), &mut call_stack, stack_memory, &self.module_list, &self.processor_system_info, &self.symbol_provider, ) .await; call_stack } } } struct BoxedSymbolProvider(Box); #[async_trait::async_trait] impl SymbolProvider for BoxedSymbolProvider { async fn fill_symbol( &self, module: &(dyn Module + Sync), frame: &mut (dyn minidump_unwind::FrameSymbolizer + Send), ) -> Result<(), minidump_unwind::FillSymbolError> { // Initialize the function name to a dummy value to allow stack scanning to work when the // DebugInfoSymbolProvider is used. It will be overwritten by `fill_symbol` if necessary, // however either way we don't care about symbols at all here. This works around // `minidump-unwind`'s behavior of only doing stack scanning if fill_symbol (1) fails or // (2) succeeds and sets a function name. By always setting a function name, we can ensure // that unwinding will always attempt to scan the stack if necessary. // // TODO: Remove this workaround once minidump-unwind is updated to a version which includes // https://github.com/rust-minidump/rust-minidump/pull/1117. frame.set_function("", 0, 0); self.0.fill_symbol(module, frame).await } async fn walk_frame( &self, module: &(dyn Module + Sync), walker: &mut (dyn minidump_unwind::FrameWalker + Send), ) -> Option<()> { self.0.walk_frame(module, walker).await } async fn get_file_path( &self, module: &(dyn Module + Sync), file_kind: minidump_unwind::FileKind, ) -> Result { self.0.get_file_path(module, file_kind).await } fn stats(&self) -> std::collections::HashMap { self.0.stats() } fn pending_stats(&self) -> minidump_unwind::PendingSymbolStats { self.0.pending_stats() } } /// Get the index of `needle` in `modules`. fn module_index(modules: &[&MinidumpModule], needle: &MinidumpModule) -> Option { modules .iter() .position(|o| o.base_address() == needle.base_address()) } /// Convert a call stack to json (in a form appropriate for the extra json file). fn call_stack_to_json(call_stack: &CallStack, modules: &[&MinidumpModule]) -> JsonValue { json!({ "frames": call_stack.frames.iter().map(|frame| { json!({ "ip": format!("{:#x}", frame.instruction), "module_index": frame.module.as_ref().and_then(|m| module_index(modules, m)), "trust": frame.trust.as_str(), }) }).collect::>() }) } fn call_stack_status(stacks: &[CallStack]) -> JsonValue { let mut error_string = String::new(); for (_i, s) in stacks.iter().enumerate() { match s.info { CallStackInfo::Ok | CallStackInfo::DumpThreadSkipped => (), CallStackInfo::UnsupportedCpu => { // If the CPU is unsupported, it ought to be the same error for every thread. error_string = "unsupported cpu".into(); break; } // We ignore these errors as they are permissible wrt the overall status. CallStackInfo::MissingContext | CallStackInfo::MissingMemory => (), } } if error_string.is_empty() { "OK".into() } else { error_string.into() } } /// Remove all object entries which have null values. fn remove_nulls(value: &mut JsonValue) { match value { JsonValue::Array(vals) => { for v in vals { remove_nulls(v); } } JsonValue::Object(kvs) => { kvs.retain(|_, v| !v.is_null()); for v in kvs.values_mut() { remove_nulls(v); } } _ => (), } }