From 5e1c52f1d42bcd029d94bbcd5c3fdbe6cf1bd624 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 18 Jul 2022 15:32:50 +0200 Subject: [PATCH 1/2] Add support for adding dyld to the module list --- src/mac/mach.rs | 115 +++++++++++++++++++++++++++------ src/mac/streams.rs | 2 +- src/mac/streams/module_list.rs | 112 +++++++++++++++++++++++++++++--- src/mac/task_dumper.rs | 59 +++++++++++------ tests/mac_minidump_writer.rs | 15 +++-- tests/task_dumper.rs | 13 +++- 6 files changed, 265 insertions(+), 51 deletions(-) diff --git a/src/mac/mach.rs b/src/mac/mach.rs index 7c9c9bfd..fe51e089 100644 --- a/src/mac/mach.rs +++ b/src/mac/mach.rs @@ -301,14 +301,46 @@ pub trait ThreadInfo { /// , the file type for the main executable image pub const MH_EXECUTE: u32 = 0x2; +/// , the file type dyld, the dynamic loader +pub const MH_DYLINKER: u32 = 0x7; // usr/include/mach-o/loader.h, magic number for MachHeader pub const MH_MAGIC_64: u32 = 0xfeedfacf; -// usr/include/mach-o/loader.h, command to map a segment -pub const LC_SEGMENT_64: u32 = 0x19; -// usr/include/mach-o/loader.h, dynamically linked shared lib ident -pub const LC_ID_DYLIB: u32 = 0xd; -// usr/include/mach-o/loader.h, the uuid -pub const LC_UUID: u32 = 0x1b; + +/// Load command constants from usr/include/mach-o/loader.h +#[repr(u32)] +#[derive(Debug)] +pub enum LoadCommandKind { + /// Command to map a segment + Segment = 0x19, + /// Dynamically linked shared lib ident + IdDylib = 0xd, + /// Image uuid + Uuid = 0x1b, + /// Load a dynamic linker. Should only be on MH_EXECUTE (main executable) + /// images when the dynamic linker is overriden + LoadDylinker = 0xe, + /// Dynamic linker identification + IdDylinker = 0xf, +} + +impl LoadCommandKind { + #[inline] + fn from_u32(kind: u32) -> Option { + Some(if kind == Self::Segment as u32 { + Self::Segment + } else if kind == Self::IdDylib as u32 { + Self::IdDylib + } else if kind == Self::Uuid as u32 { + Self::Uuid + } else if kind == Self::LoadDylinker as u32 { + Self::LoadDylinker + } else if kind == Self::IdDylinker as u32 { + Self::IdDylinker + } else { + return None; + }) + } +} /// The header at the beginning of every (valid) Mach image /// @@ -405,6 +437,33 @@ pub struct DylibCommand { pub dylib: Dylib, } +/// A program that uses a dynamic linker contains a dylinker_command to identify +/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker +/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). +/// A file can have at most one of these. +/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and +/// contains string for dyld to treat like environment variable. +#[repr(C)] +struct DylinkerCommandRepr { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT + cmd: u32, + /// includes pathname string + cmd_size: u32, + /// Dynamic linker's path name, an offset from the load command address + name: u32, +} + +pub struct DylinkerCommand<'buf> { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT + pub cmd: u32, + /// includes pathname string + pub cmd_size: u32, + /// The offset from the load command where the path was read + pub name_offset: u32, + /// Dynamic linker's path name + pub name: &'buf str, +} + /// The uuid load command contains a single 128-bit unique random number that /// identifies an object produced by the static link editor. #[repr(C)] @@ -439,6 +498,7 @@ pub enum LoadCommand<'buf> { Segment(&'buf SegmentCommand64), Dylib(&'buf DylibCommand), Uuid(&'buf UuidCommand), + DylinkerCommand(DylinkerCommand<'buf>), } pub struct LoadCommandsIter<'buf> { @@ -466,19 +526,36 @@ impl<'buf> Iterator for LoadCommandsIter<'buf> { return None; } - let cmd = match header.cmd { - LC_SEGMENT_64 => Some(LoadCommand::Segment( - &*(self.buffer.as_ptr().cast::()), - )), - LC_ID_DYLIB => Some(LoadCommand::Dylib( - &*(self.buffer.as_ptr().cast::()), - )), - LC_UUID => Some(LoadCommand::Uuid( - &*(self.buffer.as_ptr().cast::()), - )), - // Just ignore any other load commands - _ => None, - }; + let cmd = LoadCommandKind::from_u32(header.cmd).and_then(|kind| { + Some(match kind { + LoadCommandKind::Segment => LoadCommand::Segment( + &*(self.buffer.as_ptr().cast::()), + ), + LoadCommandKind::IdDylib => { + LoadCommand::Dylib(&*(self.buffer.as_ptr().cast::())) + } + LoadCommandKind::Uuid => { + LoadCommand::Uuid(&*(self.buffer.as_ptr().cast::())) + } + LoadCommandKind::LoadDylinker | LoadCommandKind::IdDylinker => { + let dcr = &*(self.buffer.as_ptr().cast::()); + + let nul = self.buffer[dcr.name as usize..header.cmd_size as usize] + .iter() + .position(|c| *c == 0)?; + + LoadCommand::DylinkerCommand(DylinkerCommand { + cmd: dcr.cmd, + cmd_size: dcr.cmd_size, + name_offset: dcr.name, + name: std::str::from_utf8( + &self.buffer[dcr.name as usize..dcr.name as usize + nul], + ) + .ok()?, + }) + } + }) + }); self.count -= 1; self.buffer = &self.buffer[header.cmd_size as usize..]; diff --git a/src/mac/streams.rs b/src/mac/streams.rs index 21f869b3..bec3b225 100644 --- a/src/mac/streams.rs +++ b/src/mac/streams.rs @@ -11,6 +11,6 @@ use super::{ errors::WriterError, mach, minidump_writer::MinidumpWriter, - task_dumper::{ImageInfo, TaskDumpError, TaskDumper}, + task_dumper::{self, ImageInfo, TaskDumpError, TaskDumper}, }; use crate::{dir_section::DumpBuf, mem_writer::*, minidump_format::*}; diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index 9dff73d8..f11a225c 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -63,7 +63,7 @@ impl MinidumpWriter { buf: &mut DumpBuf, dumper: &TaskDumper, ) -> Result, WriterError> { - let mut images = dumper.read_images()?; + let (all_images_info, mut images) = dumper.read_images()?; // Apparently MacOS will happily list the same image multiple times // for some reason, so sort the images by load address and remove all @@ -72,7 +72,6 @@ impl MinidumpWriter { images.dedup(); let mut modules = Vec::with_capacity(images.len()); - let mut has_main_executable = false; for image in images { if let Ok(image_details) = self.read_image(image, dumper) { @@ -85,7 +84,6 @@ impl MinidumpWriter { // the most interesting module for human and machine inspectors if is_main_executable { modules.insert(0, module); - has_main_executable = true; } else { modules.push(module) }; @@ -93,9 +91,25 @@ impl MinidumpWriter { } } - if !has_main_executable { + if !modules + .get(0) + .map(|rm| rm.version_info.signature != format::VS_FFI_SIGNATURE) + .unwrap_or_default() + { Err(TaskDumpError::NoExecutableImage.into()) } else { + // Crashpad also has code for loading the dyld info from the all images + // array above, but AFAICT (and from crashpad's own comments) this will + // never actually happen. It's more robust in the face of changes from + // Apple, which considering their penchant for changings things often + // and not actually documenting anything, is fair, but if that ever + // happens we can just...change the code. + if let Ok(dyld_image) = self.read_dyld(&all_images_info, dumper) { + if let Ok(module) = self.write_module(dyld_image, buf) { + modules.push(module); + } + } + Ok(modules) } } @@ -148,15 +162,17 @@ impl MinidumpWriter { let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand { name: "LC_SEGMENT_64", - id: mach::LC_SEGMENT_64, + id: mach::LoadCommandKind::Segment, })?; let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { name: "LC_UUID", - id: mach::LC_UUID, + id: mach::LoadCommandKind::Uuid, })?; let file_path = if image.file_path != 0 { - dumper.read_string(image.file_path).unwrap_or_default() + dumper + .read_string(image.file_path, None) + .unwrap_or_default() } else { None }; @@ -169,6 +185,75 @@ impl MinidumpWriter { }) } + /// Reads the dynamic linker, which is similar but + fn read_dyld( + &self, + all_images: &task_dumper::AllImagesInfo, + dumper: &TaskDumper, + ) -> Result { + let image = ImageInfo { + load_address: all_images.dyld_image_load_address, + file_path: 0, + file_mod_date: 0, + }; + + let mut load_info = None; + let mut version = None; + let mut uuid = None; + let mut file_path = None; + + { + let load_commands = dumper.read_load_commands(&image)?; + + for lc in load_commands.iter() { + match lc { + mach::LoadCommand::Segment(seg) if load_info.is_none() => { + if &seg.segment_name[..7] == b"__TEXT\0" { + let slide = image.load_address as isize - seg.vm_addr as isize; + + load_info = Some(ImageLoadInfo { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + mach::LoadCommand::Dylib(dylib) if version.is_none() => { + version = Some(dylib.dylib.current_version); + } + mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { + uuid = Some(img_id.uuid); + } + mach::LoadCommand::DylinkerCommand(dy_cmd) if file_path.is_none() => { + file_path = Some(dy_cmd.name.to_owned()); + } + _ => {} + } + + if load_info.is_some() && version.is_some() && uuid.is_some() && file_path.is_some() + { + break; + } + } + } + + let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_SEGMENT_64", + id: mach::LoadCommandKind::Segment, + })?; + let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_UUID", + id: mach::LoadCommandKind::Uuid, + })?; + + Ok(ImageDetails { + uuid, + load_info, + file_path, + version, + }) + } + fn write_module( &self, image: ImageDetails, @@ -262,7 +347,7 @@ mod test { let mdw = MinidumpWriter::new(None, None); let td = TaskDumper::new(mdw.task); - let images = td.read_images().unwrap(); + let (all_images, images) = td.read_images().unwrap(); let actual_image_count = unsafe { libc::_dyld_image_count() } as u32; @@ -314,5 +399,16 @@ mod test { actual_img_details.file_path.unwrap() ); } + + let dyld = mdw + .read_dyld(&all_images, &td) + .expect("failed to read dyld"); + + // If the user overrides the dynamic linker and runs this test it will + // fail, but that's kind of on you, person reading this comment wondering + // why the test fails. Or Apple changed the path in whatever MacOS version + // in which case, please file a PR! + assert_eq!("/usr/lib/dyld", dyld.file_path.as_deref().unwrap()); + assert!(dyld.load_info.vm_size > 0); } } diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs index c0f01d40..013d432d 100644 --- a/src/mac/task_dumper.rs +++ b/src/mac/task_dumper.rs @@ -15,8 +15,11 @@ pub enum TaskDumpError { NonUtf8String(#[from] std::string::FromUtf8Error), #[error("unable to find the main executable image for the process")] NoExecutableImage, - #[error("expected load command {name}({id}) was not found for an image")] - MissingLoadCommand { name: &'static str, id: u32 }, + #[error("expected load command {name}({id:?}) was not found for an image")] + MissingLoadCommand { + name: &'static str, + id: mach::LoadCommandKind, + }, } /// Wraps a mach call in a Result @@ -45,17 +48,29 @@ macro_rules! mach_call { /// This struct is truncated as we only need a couple of fields at the beginning /// of the struct #[repr(C)] -struct AllImagesInfo { - version: u32, // == 1 in Mac OS X 10.4 +#[derive(Copy, Clone)] +pub struct AllImagesInfo { + // VERSION 1 + pub version: u32, /// The number of [`ImageInfo`] structs at that following address info_array_count: u32, /// The address in the process where the array of [`ImageInfo`] structs is info_array_addr: u64, + /// A function pointer, unused + _notification: u64, + /// Unused + _process_detached_from_shared_region: bool, + // VERSION 2 + lib_system_initialized: bool, + // Note that crashpad adds a 32-bit int here to get proper alignment when + // building on 32-bit targets...but we explicitly don't care about 32-bit + // targets since Apple doesn't + pub dyld_image_load_address: u64, } /// `dyld_image_info` from #[repr(C)] -#[derive(Clone)] +#[derive(Debug, Clone, Copy)] pub struct ImageInfo { /// The address in the process where the image is loaded pub load_address: u64, @@ -164,15 +179,19 @@ impl TaskDumper { /// is a specialization of [`read_task_memory`] since strings can span VM /// regions. /// - /// This string is capped at 8k which should never be close to being hit as - /// it is only used for file paths for loaded modules, but then again, this - /// is MacOS, so who knows what insanity goes on. + /// If not specified, the string is capped at 8k which should never be close + /// to being hit in normal scenarios, at least for "system" strings, which is + /// all this interface is used to retrieve /// /// # Errors /// /// Fails if the address cannot be read for some reason, or the string is /// not utf-8. - pub fn read_string(&self, addr: u64) -> Result, TaskDumpError> { + pub fn read_string( + &self, + addr: u64, + expected_size: Option, + ) -> Result, TaskDumpError> { // The problem is we don't know how much to read until we know how long // the string is. And we don't know how long the string is, until we've read // the memory! So, we'll try to read kMaxStringLength bytes @@ -197,8 +216,10 @@ impl TaskDumper { }; if let Ok(size_to_end) = get_region_size() { - let mut bytes = - self.read_task_memory(addr, std::cmp::min(size_to_end as usize, 8 * 1024))?; + let mut bytes = self.read_task_memory( + addr, + std::cmp::min(size_to_end as usize, expected_size.unwrap_or(8 * 1024)), + )?; // Find the null terminator and truncate our string if let Some(null_pos) = bytes.iter().position(|c| *c == 0) { @@ -318,7 +339,7 @@ impl TaskDumper { /// /// The syscall to retrieve the location of the loaded images fails, or /// the syscall to read the loaded images from the process memory fails - pub fn read_images(&self) -> Result, TaskDumpError> { + pub fn read_images(&self) -> Result<(AllImagesInfo, Vec), TaskDumpError> { impl mach::TaskInfo for mach::task_info::task_dyld_info { const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; } @@ -336,12 +357,14 @@ impl TaskDumper { let dyld_all_info_buf = self.read_task_memory::(all_images_addr, std::mem::size_of::())?; // SAFETY: this is fine as long as the kernel isn't lying to us - let all_dyld_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; + let all_images_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; - self.read_task_memory::( - all_dyld_info.info_array_addr, - all_dyld_info.info_array_count as usize, - ) + let images = self.read_task_memory::( + all_images_info.info_array_addr, + all_images_info.info_array_count as usize, + )?; + + Ok((*all_images_info, images)) } /// Retrieves the main executable image for the task. @@ -354,7 +377,7 @@ impl TaskDumper { /// Any of the errors that apply to [`Self::read_images`] apply here, in /// addition to not being able to find the main executable image pub fn read_executable_image(&self) -> Result { - let images = self.read_images()?; + let (_, images) = self.read_images()?; for img in images { let mach_header = self.read_task_memory::(img.load_address, 1)?; diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index b0985893..5dd80d6b 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -91,10 +91,7 @@ fn dump_external_process() { assert!(matches!( crash_reason, - CrashReason::MacGeneral( - minidump_common::errors::ExceptionCodeMac::EXC_BREAKPOINT, - 100 - ) + CrashReason::MacGeneral(minidump_common::errors::ExceptionCodeMac::EXC_BREAKPOINT, _) )); let _: MinidumpModuleList = md.get_stream().expect("Couldn't find MinidumpModuleList"); @@ -207,4 +204,14 @@ fn stackwalks() { }), "unable to locate expected function" ); + + let mod_list: MinidumpModuleList = md + .minidump + .get_stream() + .expect("Couldn't find MinidumpModuleList"); + + // Ensure we found dyld + assert!(mod_list + .iter() + .any(|module| &module.name == "/usr/lib/dyld")); } diff --git a/tests/task_dumper.rs b/tests/task_dumper.rs index 6f04c396..50c3536a 100644 --- a/tests/task_dumper.rs +++ b/tests/task_dumper.rs @@ -58,7 +58,7 @@ fn iterates_load_commands() { .expect("load cmd didn't end with newline"); if matches!( &block[cmd + 4..cmd + cmd_end], - "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" + "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" | "LC_LOAD_DYLINKER" ) { expected.push_str(block); } @@ -139,6 +139,17 @@ fn iterates_load_commands() { ) .unwrap(); } + LoadCommand::DylinkerCommand(dy_cmd) => { + write!( + &mut actual, + " + cmd LC_LOAD_DYLINKER + cmdsize {} + name {} (offset {})", + dy_cmd.cmd_size, dy_cmd.name, dy_cmd.name_offset, + ) + .unwrap(); + } } } } From 195adbfc5a353b06de52eea70d6cf8ee61a1f061 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 18 Jul 2022 15:51:15 +0200 Subject: [PATCH 2/2] Fix warnings --- src/mac/streams/module_list.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index f11a225c..5bdafcc9 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -320,11 +320,15 @@ impl MinidumpWriter { } #[cfg(test)] +// The libc functions used here are all marked as deprecated, saying you +// should use the mach2 crate, however, the mach2 crate does not expose +// any of these functions so... +#[allow(deprecated)] mod test { use super::*; - /// This function isn't declared in libc nor mach2. And is also undocumented - /// by apple, I know, SHOCKING + // This function isn't declared in libc nor mach2. And is also undocumented + // by apple, I know, SHOCKING extern "C" { fn getsegmentdata( header: *const libc::mach_header, @@ -339,10 +343,6 @@ mod test { /// is why they aren't used in the actual implementation as we want to handle /// both the local and intra-process scenarios #[test] - /// The libc functions used here are all marked as deprecated, saying you - /// should use the mach2 crate, however, the mach2 crate does not expose - /// any of these functions so... - #[allow(deprecated)] fn images_match() { let mdw = MinidumpWriter::new(None, None); let td = TaskDumper::new(mdw.task);