From 5e1c52f1d42bcd029d94bbcd5c3fdbe6cf1bd624 Mon Sep 17 00:00:00 2001
From: Jake Shadle
Date: Mon, 18 Jul 2022 15:32:50 +0200
Subject: [PATCH 1/2] Add support for adding dyld to the module list
---
src/mac/mach.rs | 115 +++++++++++++++++++++++++++------
src/mac/streams.rs | 2 +-
src/mac/streams/module_list.rs | 112 +++++++++++++++++++++++++++++---
src/mac/task_dumper.rs | 59 +++++++++++------
tests/mac_minidump_writer.rs | 15 +++--
tests/task_dumper.rs | 13 +++-
6 files changed, 265 insertions(+), 51 deletions(-)
diff --git a/src/mac/mach.rs b/src/mac/mach.rs
index 7c9c9bfd..fe51e089 100644
--- a/src/mac/mach.rs
+++ b/src/mac/mach.rs
@@ -301,14 +301,46 @@ pub trait ThreadInfo {
/// , the file type for the main executable image
pub const MH_EXECUTE: u32 = 0x2;
+/// , the file type dyld, the dynamic loader
+pub const MH_DYLINKER: u32 = 0x7;
// usr/include/mach-o/loader.h, magic number for MachHeader
pub const MH_MAGIC_64: u32 = 0xfeedfacf;
-// usr/include/mach-o/loader.h, command to map a segment
-pub const LC_SEGMENT_64: u32 = 0x19;
-// usr/include/mach-o/loader.h, dynamically linked shared lib ident
-pub const LC_ID_DYLIB: u32 = 0xd;
-// usr/include/mach-o/loader.h, the uuid
-pub const LC_UUID: u32 = 0x1b;
+
+/// Load command constants from usr/include/mach-o/loader.h
+#[repr(u32)]
+#[derive(Debug)]
+pub enum LoadCommandKind {
+ /// Command to map a segment
+ Segment = 0x19,
+ /// Dynamically linked shared lib ident
+ IdDylib = 0xd,
+ /// Image uuid
+ Uuid = 0x1b,
+ /// Load a dynamic linker. Should only be on MH_EXECUTE (main executable)
+ /// images when the dynamic linker is overriden
+ LoadDylinker = 0xe,
+ /// Dynamic linker identification
+ IdDylinker = 0xf,
+}
+
+impl LoadCommandKind {
+ #[inline]
+ fn from_u32(kind: u32) -> Option {
+ Some(if kind == Self::Segment as u32 {
+ Self::Segment
+ } else if kind == Self::IdDylib as u32 {
+ Self::IdDylib
+ } else if kind == Self::Uuid as u32 {
+ Self::Uuid
+ } else if kind == Self::LoadDylinker as u32 {
+ Self::LoadDylinker
+ } else if kind == Self::IdDylinker as u32 {
+ Self::IdDylinker
+ } else {
+ return None;
+ })
+ }
+}
/// The header at the beginning of every (valid) Mach image
///
@@ -405,6 +437,33 @@ pub struct DylibCommand {
pub dylib: Dylib,
}
+/// A program that uses a dynamic linker contains a dylinker_command to identify
+/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
+/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
+/// A file can have at most one of these.
+/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and
+/// contains string for dyld to treat like environment variable.
+#[repr(C)]
+struct DylinkerCommandRepr {
+ /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT
+ cmd: u32,
+ /// includes pathname string
+ cmd_size: u32,
+ /// Dynamic linker's path name, an offset from the load command address
+ name: u32,
+}
+
+pub struct DylinkerCommand<'buf> {
+ /// LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT
+ pub cmd: u32,
+ /// includes pathname string
+ pub cmd_size: u32,
+ /// The offset from the load command where the path was read
+ pub name_offset: u32,
+ /// Dynamic linker's path name
+ pub name: &'buf str,
+}
+
/// The uuid load command contains a single 128-bit unique random number that
/// identifies an object produced by the static link editor.
#[repr(C)]
@@ -439,6 +498,7 @@ pub enum LoadCommand<'buf> {
Segment(&'buf SegmentCommand64),
Dylib(&'buf DylibCommand),
Uuid(&'buf UuidCommand),
+ DylinkerCommand(DylinkerCommand<'buf>),
}
pub struct LoadCommandsIter<'buf> {
@@ -466,19 +526,36 @@ impl<'buf> Iterator for LoadCommandsIter<'buf> {
return None;
}
- let cmd = match header.cmd {
- LC_SEGMENT_64 => Some(LoadCommand::Segment(
- &*(self.buffer.as_ptr().cast::()),
- )),
- LC_ID_DYLIB => Some(LoadCommand::Dylib(
- &*(self.buffer.as_ptr().cast::()),
- )),
- LC_UUID => Some(LoadCommand::Uuid(
- &*(self.buffer.as_ptr().cast::()),
- )),
- // Just ignore any other load commands
- _ => None,
- };
+ let cmd = LoadCommandKind::from_u32(header.cmd).and_then(|kind| {
+ Some(match kind {
+ LoadCommandKind::Segment => LoadCommand::Segment(
+ &*(self.buffer.as_ptr().cast::()),
+ ),
+ LoadCommandKind::IdDylib => {
+ LoadCommand::Dylib(&*(self.buffer.as_ptr().cast::()))
+ }
+ LoadCommandKind::Uuid => {
+ LoadCommand::Uuid(&*(self.buffer.as_ptr().cast::()))
+ }
+ LoadCommandKind::LoadDylinker | LoadCommandKind::IdDylinker => {
+ let dcr = &*(self.buffer.as_ptr().cast::());
+
+ let nul = self.buffer[dcr.name as usize..header.cmd_size as usize]
+ .iter()
+ .position(|c| *c == 0)?;
+
+ LoadCommand::DylinkerCommand(DylinkerCommand {
+ cmd: dcr.cmd,
+ cmd_size: dcr.cmd_size,
+ name_offset: dcr.name,
+ name: std::str::from_utf8(
+ &self.buffer[dcr.name as usize..dcr.name as usize + nul],
+ )
+ .ok()?,
+ })
+ }
+ })
+ });
self.count -= 1;
self.buffer = &self.buffer[header.cmd_size as usize..];
diff --git a/src/mac/streams.rs b/src/mac/streams.rs
index 21f869b3..bec3b225 100644
--- a/src/mac/streams.rs
+++ b/src/mac/streams.rs
@@ -11,6 +11,6 @@ use super::{
errors::WriterError,
mach,
minidump_writer::MinidumpWriter,
- task_dumper::{ImageInfo, TaskDumpError, TaskDumper},
+ task_dumper::{self, ImageInfo, TaskDumpError, TaskDumper},
};
use crate::{dir_section::DumpBuf, mem_writer::*, minidump_format::*};
diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs
index 9dff73d8..f11a225c 100644
--- a/src/mac/streams/module_list.rs
+++ b/src/mac/streams/module_list.rs
@@ -63,7 +63,7 @@ impl MinidumpWriter {
buf: &mut DumpBuf,
dumper: &TaskDumper,
) -> Result, WriterError> {
- let mut images = dumper.read_images()?;
+ let (all_images_info, mut images) = dumper.read_images()?;
// Apparently MacOS will happily list the same image multiple times
// for some reason, so sort the images by load address and remove all
@@ -72,7 +72,6 @@ impl MinidumpWriter {
images.dedup();
let mut modules = Vec::with_capacity(images.len());
- let mut has_main_executable = false;
for image in images {
if let Ok(image_details) = self.read_image(image, dumper) {
@@ -85,7 +84,6 @@ impl MinidumpWriter {
// the most interesting module for human and machine inspectors
if is_main_executable {
modules.insert(0, module);
- has_main_executable = true;
} else {
modules.push(module)
};
@@ -93,9 +91,25 @@ impl MinidumpWriter {
}
}
- if !has_main_executable {
+ if !modules
+ .get(0)
+ .map(|rm| rm.version_info.signature != format::VS_FFI_SIGNATURE)
+ .unwrap_or_default()
+ {
Err(TaskDumpError::NoExecutableImage.into())
} else {
+ // Crashpad also has code for loading the dyld info from the all images
+ // array above, but AFAICT (and from crashpad's own comments) this will
+ // never actually happen. It's more robust in the face of changes from
+ // Apple, which considering their penchant for changings things often
+ // and not actually documenting anything, is fair, but if that ever
+ // happens we can just...change the code.
+ if let Ok(dyld_image) = self.read_dyld(&all_images_info, dumper) {
+ if let Ok(module) = self.write_module(dyld_image, buf) {
+ modules.push(module);
+ }
+ }
+
Ok(modules)
}
}
@@ -148,15 +162,17 @@ impl MinidumpWriter {
let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand {
name: "LC_SEGMENT_64",
- id: mach::LC_SEGMENT_64,
+ id: mach::LoadCommandKind::Segment,
})?;
let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand {
name: "LC_UUID",
- id: mach::LC_UUID,
+ id: mach::LoadCommandKind::Uuid,
})?;
let file_path = if image.file_path != 0 {
- dumper.read_string(image.file_path).unwrap_or_default()
+ dumper
+ .read_string(image.file_path, None)
+ .unwrap_or_default()
} else {
None
};
@@ -169,6 +185,75 @@ impl MinidumpWriter {
})
}
+ /// Reads the dynamic linker, which is similar but
+ fn read_dyld(
+ &self,
+ all_images: &task_dumper::AllImagesInfo,
+ dumper: &TaskDumper,
+ ) -> Result {
+ let image = ImageInfo {
+ load_address: all_images.dyld_image_load_address,
+ file_path: 0,
+ file_mod_date: 0,
+ };
+
+ let mut load_info = None;
+ let mut version = None;
+ let mut uuid = None;
+ let mut file_path = None;
+
+ {
+ let load_commands = dumper.read_load_commands(&image)?;
+
+ for lc in load_commands.iter() {
+ match lc {
+ mach::LoadCommand::Segment(seg) if load_info.is_none() => {
+ if &seg.segment_name[..7] == b"__TEXT\0" {
+ let slide = image.load_address as isize - seg.vm_addr as isize;
+
+ load_info = Some(ImageLoadInfo {
+ vm_addr: seg.vm_addr,
+ vm_size: seg.vm_size,
+ slide,
+ });
+ }
+ }
+ mach::LoadCommand::Dylib(dylib) if version.is_none() => {
+ version = Some(dylib.dylib.current_version);
+ }
+ mach::LoadCommand::Uuid(img_id) if uuid.is_none() => {
+ uuid = Some(img_id.uuid);
+ }
+ mach::LoadCommand::DylinkerCommand(dy_cmd) if file_path.is_none() => {
+ file_path = Some(dy_cmd.name.to_owned());
+ }
+ _ => {}
+ }
+
+ if load_info.is_some() && version.is_some() && uuid.is_some() && file_path.is_some()
+ {
+ break;
+ }
+ }
+ }
+
+ let load_info = load_info.ok_or(TaskDumpError::MissingLoadCommand {
+ name: "LC_SEGMENT_64",
+ id: mach::LoadCommandKind::Segment,
+ })?;
+ let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand {
+ name: "LC_UUID",
+ id: mach::LoadCommandKind::Uuid,
+ })?;
+
+ Ok(ImageDetails {
+ uuid,
+ load_info,
+ file_path,
+ version,
+ })
+ }
+
fn write_module(
&self,
image: ImageDetails,
@@ -262,7 +347,7 @@ mod test {
let mdw = MinidumpWriter::new(None, None);
let td = TaskDumper::new(mdw.task);
- let images = td.read_images().unwrap();
+ let (all_images, images) = td.read_images().unwrap();
let actual_image_count = unsafe { libc::_dyld_image_count() } as u32;
@@ -314,5 +399,16 @@ mod test {
actual_img_details.file_path.unwrap()
);
}
+
+ let dyld = mdw
+ .read_dyld(&all_images, &td)
+ .expect("failed to read dyld");
+
+ // If the user overrides the dynamic linker and runs this test it will
+ // fail, but that's kind of on you, person reading this comment wondering
+ // why the test fails. Or Apple changed the path in whatever MacOS version
+ // in which case, please file a PR!
+ assert_eq!("/usr/lib/dyld", dyld.file_path.as_deref().unwrap());
+ assert!(dyld.load_info.vm_size > 0);
}
}
diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs
index c0f01d40..013d432d 100644
--- a/src/mac/task_dumper.rs
+++ b/src/mac/task_dumper.rs
@@ -15,8 +15,11 @@ pub enum TaskDumpError {
NonUtf8String(#[from] std::string::FromUtf8Error),
#[error("unable to find the main executable image for the process")]
NoExecutableImage,
- #[error("expected load command {name}({id}) was not found for an image")]
- MissingLoadCommand { name: &'static str, id: u32 },
+ #[error("expected load command {name}({id:?}) was not found for an image")]
+ MissingLoadCommand {
+ name: &'static str,
+ id: mach::LoadCommandKind,
+ },
}
/// Wraps a mach call in a Result
@@ -45,17 +48,29 @@ macro_rules! mach_call {
/// This struct is truncated as we only need a couple of fields at the beginning
/// of the struct
#[repr(C)]
-struct AllImagesInfo {
- version: u32, // == 1 in Mac OS X 10.4
+#[derive(Copy, Clone)]
+pub struct AllImagesInfo {
+ // VERSION 1
+ pub version: u32,
/// The number of [`ImageInfo`] structs at that following address
info_array_count: u32,
/// The address in the process where the array of [`ImageInfo`] structs is
info_array_addr: u64,
+ /// A function pointer, unused
+ _notification: u64,
+ /// Unused
+ _process_detached_from_shared_region: bool,
+ // VERSION 2
+ lib_system_initialized: bool,
+ // Note that crashpad adds a 32-bit int here to get proper alignment when
+ // building on 32-bit targets...but we explicitly don't care about 32-bit
+ // targets since Apple doesn't
+ pub dyld_image_load_address: u64,
}
/// `dyld_image_info` from
#[repr(C)]
-#[derive(Clone)]
+#[derive(Debug, Clone, Copy)]
pub struct ImageInfo {
/// The address in the process where the image is loaded
pub load_address: u64,
@@ -164,15 +179,19 @@ impl TaskDumper {
/// is a specialization of [`read_task_memory`] since strings can span VM
/// regions.
///
- /// This string is capped at 8k which should never be close to being hit as
- /// it is only used for file paths for loaded modules, but then again, this
- /// is MacOS, so who knows what insanity goes on.
+ /// If not specified, the string is capped at 8k which should never be close
+ /// to being hit in normal scenarios, at least for "system" strings, which is
+ /// all this interface is used to retrieve
///
/// # Errors
///
/// Fails if the address cannot be read for some reason, or the string is
/// not utf-8.
- pub fn read_string(&self, addr: u64) -> Result