From b4c50cbfa8705977100833156a46911e950fc868 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 8 Apr 2022 16:09:36 +0200 Subject: [PATCH 01/53] Move memory write to be a shared module --- src/lib.rs | 5 + src/linux/dso_debug.rs | 8 +- src/linux/errors.rs | 11 +- src/linux/minidump_writer.rs | 3 +- src/linux/sections.rs | 263 +-------------------------------- src/mem_writer.rs | 271 +++++++++++++++++++++++++++++++++++ 6 files changed, 283 insertions(+), 278 deletions(-) create mode 100644 src/mem_writer.rs diff --git a/src/lib.rs b/src/lib.rs index ab803757..da5ef8ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,3 +12,8 @@ cfg_if::cfg_if! { pub mod minidump_cpu; pub mod minidump_format; + +/// Non-windows platforms need additional code since they are essentially +/// replicating functionality we get for free on Windows +#[cfg(not(target_os = "windows"))] +pub(crate) mod mem_writer; diff --git a/src/linux/dso_debug.rs b/src/linux/dso_debug.rs index eb3cc396..7aa60adb 100644 --- a/src/linux/dso_debug.rs +++ b/src/linux/dso_debug.rs @@ -1,10 +1,6 @@ use crate::{ - linux::{ - auxv_reader::AuxvType, - errors::SectionDsoDebugError, - ptrace_dumper::PtraceDumper, - sections::{write_string_to_location, Buffer, MemoryArrayWriter, MemoryWriter}, - }, + linux::{auxv_reader::AuxvType, errors::SectionDsoDebugError, ptrace_dumper::PtraceDumper}, + mem_writer::{write_string_to_location, Buffer, MemoryArrayWriter, MemoryWriter}, minidump_format::*, }; use std::collections::HashMap; diff --git a/src/linux/errors.rs b/src/linux/errors.rs index ca9d83f4..7caba1ce 100644 --- a/src/linux/errors.rs +++ b/src/linux/errors.rs @@ -1,4 +1,5 @@ use crate::maps_reader::MappingInfo; +use crate::mem_writer::MemoryWriterError; use crate::thread_info::Pid; use goblin; use thiserror::Error; @@ -120,16 +121,6 @@ pub enum DumperError { MapsReaderError(#[from] MapsReaderError), } -#[derive(Debug, Error)] -pub enum MemoryWriterError { - #[error("IO error when writing to DumpBuf")] - IOError(#[from] std::io::Error), - #[error("Failed integer conversion")] - TryFromIntError(#[from] std::num::TryFromIntError), - #[error("Failed to write to buffer")] - Scroll(#[from] scroll::Error), -} - #[derive(Debug, Error)] pub enum SectionAppMemoryError { #[error("Failed to copy memory from process")] diff --git a/src/linux/minidump_writer.rs b/src/linux/minidump_writer.rs index 069308a5..1510f198 100644 --- a/src/linux/minidump_writer.rs +++ b/src/linux/minidump_writer.rs @@ -3,12 +3,13 @@ use crate::{ app_memory::AppMemoryList, crash_context::CrashContext, dso_debug, - errors::{FileWriterError, InitError, MemoryWriterError, WriterError}, + errors::{FileWriterError, InitError, WriterError}, maps_reader::{MappingInfo, MappingList}, ptrace_dumper::PtraceDumper, sections::*, thread_info::Pid, }, + mem_writer::{Buffer, MemoryArrayWriter, MemoryWriter, MemoryWriterError}, minidump_format::*, }; use std::io::{Seek, SeekFrom, Write}; diff --git a/src/linux/sections.rs b/src/linux/sections.rs index b7850ac0..d898ac5e 100644 --- a/src/linux/sections.rs +++ b/src/linux/sections.rs @@ -7,270 +7,11 @@ pub mod thread_list_stream; pub mod thread_names_stream; use crate::{ - errors::{self, MemoryWriterError}, + errors::{self}, linux::{ minidump_writer::{self, DumpBuf, MinidumpWriter}, ptrace_dumper::PtraceDumper, }, + mem_writer::*, minidump_format::*, }; -use scroll::ctx::{SizeWith, TryIntoCtx}; - -type WriteResult = std::result::Result; - -macro_rules! size { - ($t:ty) => { - <$t>::size_with(&scroll::Endian::Little) - }; -} - -pub struct Buffer { - inner: Vec, -} - -impl Buffer { - pub fn with_capacity(cap: usize) -> Self { - Self { - inner: Vec::with_capacity(cap), - } - } - - #[inline] - pub fn position(&self) -> u64 { - self.inner.len() as u64 - } - - #[inline] - #[must_use] - fn reserve(&mut self, len: usize) -> usize { - let mark = self.inner.len(); - self.inner.resize(self.inner.len() + len, 0); - mark - } - - #[inline] - fn write(&mut self, val: N) -> Result - where - N: TryIntoCtx + SizeWith, - E: From, - { - self.write_at(self.inner.len(), val) - } - - fn write_at(&mut self, offset: usize, val: N) -> Result - where - N: TryIntoCtx + SizeWith, - E: From, - { - let to_write = size!(N); - let remainder = self.inner.len() - offset; - if remainder < to_write { - self.inner - .resize(self.inner.len() + to_write - remainder, 0); - } - - let dst = &mut self.inner[offset..offset + to_write]; - val.try_into_ctx(dst, scroll::Endian::Little) - } - - #[inline] - pub fn write_all(&mut self, buffer: &[u8]) { - self.inner.extend_from_slice(buffer); - } -} - -impl From for Vec { - fn from(b: Buffer) -> Self { - b.inner - } -} - -impl std::ops::Deref for Buffer { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -#[derive(Debug, PartialEq)] -pub struct MemoryWriter { - pub position: MDRVA, - pub size: usize, - phantom: std::marker::PhantomData, -} - -impl MemoryWriter -where - T: TryIntoCtx + SizeWith, -{ - /// Create a slot for a type T in the buffer, we can fill right now with real values. - pub fn alloc_with_val(buffer: &mut Buffer, val: T) -> WriteResult { - // Mark the position as we may overwrite later - let position = buffer.position(); - let size = buffer.write(val)?; - - Ok(Self { - position: position as u32, - size, - phantom: std::marker::PhantomData, - }) - } - - /// Create a slot for a type T in the buffer, we can fill later with real values. - pub fn alloc(buffer: &mut Buffer) -> WriteResult { - let size = size!(T); - let position = buffer.reserve(size) as u32; - - Ok(Self { - position: position as u32, - size, - phantom: std::marker::PhantomData, - }) - } - - /// Write actual values in the buffer-slot we got during `alloc()` - #[inline] - pub fn set_value(&mut self, buffer: &mut Buffer, val: T) -> WriteResult<()> { - Ok(buffer.write_at(self.position as usize, val).map(|_sz| ())?) - } - - #[inline] - pub fn location(&self) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: size!(T) as u32, - rva: self.position, - } - } -} - -#[derive(Debug, PartialEq)] -pub struct MemoryArrayWriter { - pub position: MDRVA, - array_size: usize, - phantom: std::marker::PhantomData, -} - -impl MemoryArrayWriter { - #[inline] - pub fn write_bytes(buffer: &mut Buffer, slice: &[u8]) -> Self { - let position = buffer.position(); - buffer.write_all(slice); - - Self { - position: position as u32, - array_size: slice.len(), - phantom: std::marker::PhantomData, - } - } -} - -impl MemoryArrayWriter -where - T: TryIntoCtx + SizeWith + Copy, -{ - pub fn alloc_from_array(buffer: &mut Buffer, array: &[T]) -> WriteResult { - let array_size = array.len(); - let position = buffer.reserve(array_size * size!(T)); - - for (idx, val) in array.iter().enumerate() { - buffer.write_at(position + idx * size!(T), *val)?; - } - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } -} - -impl MemoryArrayWriter -where - T: TryIntoCtx + SizeWith, -{ - /// Create a slot for a type T in the buffer, we can fill in the values in one go. - pub fn alloc_from_iter( - buffer: &mut Buffer, - iter: impl IntoIterator, - ) -> WriteResult - where - I: std::iter::ExactSizeIterator, - { - let iter = iter.into_iter(); - let array_size = iter.len(); - let size = size!(T); - let position = buffer.reserve(array_size * size); - - for (idx, val) in iter.enumerate() { - buffer.write_at(position + idx * size, val)?; - } - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } - - /// Create a slot for a type T in the buffer, we can fill later with real values. - /// This function fills it with `Default::default()`, which is less performant than - /// using uninitialized memory, but safe. - pub fn alloc_array(buffer: &mut Buffer, array_size: usize) -> WriteResult { - let position = buffer.reserve(array_size * size!(T)); - - Ok(Self { - position: position as u32, - array_size, - phantom: std::marker::PhantomData, - }) - } - - /// Write actual values in the buffer-slot we got during `alloc()` - #[inline] - pub fn set_value_at(&mut self, buffer: &mut Buffer, val: T, index: usize) -> WriteResult<()> { - Ok(buffer - .write_at(self.position as usize + size!(T) * index, val) - .map(|_sz| ())?) - } - - #[inline] - pub fn location(&self) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: (self.array_size * size!(T)) as u32, - rva: self.position, - } - } - - #[inline] - pub fn location_of_index(&self, idx: usize) -> MDLocationDescriptor { - MDLocationDescriptor { - data_size: size!(T) as u32, - rva: self.position + (size!(T) * idx) as u32, - } - } -} - -pub fn write_string_to_location( - buffer: &mut Buffer, - text: &str, -) -> WriteResult { - let letters: Vec = text.encode_utf16().collect(); - - // First write size of the string (x letters in u16, times the size of u16) - let text_header = MemoryWriter::::alloc_with_val( - buffer, - (letters.len() * std::mem::size_of::()).try_into()?, - )?; - - // Then write utf-16 letters after that - let mut text_section = MemoryArrayWriter::::alloc_array(buffer, letters.len())?; - for (index, letter) in letters.iter().enumerate() { - text_section.set_value_at(buffer, *letter, index)?; - } - - let mut location = text_header.location(); - location.data_size += text_section.location().data_size; - - Ok(location) -} diff --git a/src/mem_writer.rs b/src/mem_writer.rs new file mode 100644 index 00000000..5b5b6298 --- /dev/null +++ b/src/mem_writer.rs @@ -0,0 +1,271 @@ +use crate::minidump_format::{MDLocationDescriptor, MDRVA}; +use scroll::ctx::{SizeWith, TryIntoCtx}; + +#[derive(Debug, thiserror::Error)] +pub enum MemoryWriterError { + #[error("IO error when writing to DumpBuf")] + IOError(#[from] std::io::Error), + #[error("Failed integer conversion")] + TryFromIntError(#[from] std::num::TryFromIntError), + #[error("Failed to write to buffer")] + Scroll(#[from] scroll::Error), +} + +type WriteResult = std::result::Result; + +macro_rules! size { + ($t:ty) => { + <$t>::size_with(&scroll::Endian::Little) + }; +} + +pub struct Buffer { + inner: Vec, +} + +impl Buffer { + pub fn with_capacity(cap: usize) -> Self { + Self { + inner: Vec::with_capacity(cap), + } + } + + #[inline] + pub fn position(&self) -> u64 { + self.inner.len() as u64 + } + + #[inline] + #[must_use] + fn reserve(&mut self, len: usize) -> usize { + let mark = self.inner.len(); + self.inner.resize(self.inner.len() + len, 0); + mark + } + + #[inline] + fn write(&mut self, val: N) -> Result + where + N: TryIntoCtx + SizeWith, + E: From, + { + self.write_at(self.inner.len(), val) + } + + fn write_at(&mut self, offset: usize, val: N) -> Result + where + N: TryIntoCtx + SizeWith, + E: From, + { + let to_write = size!(N); + let remainder = self.inner.len() - offset; + if remainder < to_write { + self.inner + .resize(self.inner.len() + to_write - remainder, 0); + } + + let dst = &mut self.inner[offset..offset + to_write]; + val.try_into_ctx(dst, scroll::Endian::Little) + } + + #[inline] + pub fn write_all(&mut self, buffer: &[u8]) { + self.inner.extend_from_slice(buffer); + } +} + +impl From for Vec { + fn from(b: Buffer) -> Self { + b.inner + } +} + +impl std::ops::Deref for Buffer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[derive(Debug, PartialEq)] +pub struct MemoryWriter { + pub position: MDRVA, + pub size: usize, + phantom: std::marker::PhantomData, +} + +impl MemoryWriter +where + T: TryIntoCtx + SizeWith, +{ + /// Create a slot for a type T in the buffer, we can fill right now with real values. + pub fn alloc_with_val(buffer: &mut Buffer, val: T) -> WriteResult { + // Mark the position as we may overwrite later + let position = buffer.position(); + let size = buffer.write(val)?; + + Ok(Self { + position: position as u32, + size, + phantom: std::marker::PhantomData, + }) + } + + /// Create a slot for a type T in the buffer, we can fill later with real values. + pub fn alloc(buffer: &mut Buffer) -> WriteResult { + let size = size!(T); + let position = buffer.reserve(size) as u32; + + Ok(Self { + position: position as u32, + size, + phantom: std::marker::PhantomData, + }) + } + + /// Write actual values in the buffer-slot we got during `alloc()` + #[inline] + pub fn set_value(&mut self, buffer: &mut Buffer, val: T) -> WriteResult<()> { + Ok(buffer.write_at(self.position as usize, val).map(|_sz| ())?) + } + + #[inline] + pub fn location(&self) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: size!(T) as u32, + rva: self.position, + } + } +} + +#[derive(Debug, PartialEq)] +pub struct MemoryArrayWriter { + pub position: MDRVA, + array_size: usize, + phantom: std::marker::PhantomData, +} + +impl MemoryArrayWriter { + #[inline] + pub fn write_bytes(buffer: &mut Buffer, slice: &[u8]) -> Self { + let position = buffer.position(); + buffer.write_all(slice); + + Self { + position: position as u32, + array_size: slice.len(), + phantom: std::marker::PhantomData, + } + } +} + +impl MemoryArrayWriter +where + T: TryIntoCtx + SizeWith + Copy, +{ + pub fn alloc_from_array(buffer: &mut Buffer, array: &[T]) -> WriteResult { + let array_size = array.len(); + let position = buffer.reserve(array_size * size!(T)); + + for (idx, val) in array.iter().enumerate() { + buffer.write_at(position + idx * size!(T), *val)?; + } + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } +} + +impl MemoryArrayWriter +where + T: TryIntoCtx + SizeWith, +{ + /// Create a slot for a type T in the buffer, we can fill in the values in one go. + pub fn alloc_from_iter( + buffer: &mut Buffer, + iter: impl IntoIterator, + ) -> WriteResult + where + I: std::iter::ExactSizeIterator, + { + let iter = iter.into_iter(); + let array_size = iter.len(); + let size = size!(T); + let position = buffer.reserve(array_size * size); + + for (idx, val) in iter.enumerate() { + buffer.write_at(position + idx * size, val)?; + } + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } + + /// Create a slot for a type T in the buffer, we can fill later with real values. + /// This function fills it with `Default::default()`, which is less performant than + /// using uninitialized memory, but safe. + pub fn alloc_array(buffer: &mut Buffer, array_size: usize) -> WriteResult { + let position = buffer.reserve(array_size * size!(T)); + + Ok(Self { + position: position as u32, + array_size, + phantom: std::marker::PhantomData, + }) + } + + /// Write actual values in the buffer-slot we got during `alloc()` + #[inline] + pub fn set_value_at(&mut self, buffer: &mut Buffer, val: T, index: usize) -> WriteResult<()> { + Ok(buffer + .write_at(self.position as usize + size!(T) * index, val) + .map(|_sz| ())?) + } + + #[inline] + pub fn location(&self) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: (self.array_size * size!(T)) as u32, + rva: self.position, + } + } + + #[inline] + pub fn location_of_index(&self, idx: usize) -> MDLocationDescriptor { + MDLocationDescriptor { + data_size: size!(T) as u32, + rva: self.position + (size!(T) * idx) as u32, + } + } +} + +pub fn write_string_to_location( + buffer: &mut Buffer, + text: &str, +) -> WriteResult { + let letters: Vec = text.encode_utf16().collect(); + + // First write size of the string (x letters in u16, times the size of u16) + let text_header = MemoryWriter::::alloc_with_val( + buffer, + (letters.len() * std::mem::size_of::()).try_into()?, + )?; + + // Then write utf-16 letters after that + let mut text_section = MemoryArrayWriter::::alloc_array(buffer, letters.len())?; + for (index, letter) in letters.iter().enumerate() { + text_section.set_value_at(buffer, *letter, index)?; + } + + let mut location = text_header.location(); + location.data_size += text_section.location().data_size; + + Ok(location) +} From d777ecc06a3017d767bbdff43f135305a9331493 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 12 Apr 2022 11:50:58 +0200 Subject: [PATCH 02/53] Begin fleshing out MacOS implementation --- Cargo.toml | 4 + src/lib.rs | 4 + src/mac.rs | 3 + src/mac/errors.rs | 20 ++ src/mac/minidump_writer.rs | 161 +++++++++++++++ src/mac/streams.rs | 7 + src/mac/streams/memory_list.rs | 76 +++++++ src/mac/streams/module_list.rs | 262 ++++++++++++++++++++++++ src/mac/streams/system_info.rs | 258 ++++++++++++++++++++++++ src/mac/streams/thread_list.rs | 351 +++++++++++++++++++++++++++++++++ 10 files changed, 1146 insertions(+) create mode 100644 src/mac.rs create mode 100644 src/mac/errors.rs create mode 100644 src/mac/minidump_writer.rs create mode 100644 src/mac/streams.rs create mode 100644 src/mac/streams/memory_list.rs create mode 100644 src/mac/streams/module_list.rs create mode 100644 src/mac/streams/system_info.rs create mode 100644 src/mac/streams/thread_list.rs diff --git a/Cargo.toml b/Cargo.toml index 1b8bd443..272835c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,5 +41,9 @@ features = [ "Win32_System_Threading", ] +[target.'cfg(target_os = "macos")'.dependencies] +# Binds some additional mac specifics not in libc +mach2 = "0.4" + [dev-dependencies] minidump = "0.10" diff --git a/src/lib.rs b/src/lib.rs index da5ef8ea..13c5f82e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,10 @@ cfg_if::cfg_if! { mod windows; pub use windows::*; + } else if #[cfg(target_os = "macos")] { + mod mac; + + pub use mac::*; } } diff --git a/src/mac.rs b/src/mac.rs new file mode 100644 index 00000000..aaa7f530 --- /dev/null +++ b/src/mac.rs @@ -0,0 +1,3 @@ +pub mod errors; +pub mod minidump_writer; +mod streams; diff --git a/src/mac/errors.rs b/src/mac/errors.rs new file mode 100644 index 00000000..47536942 --- /dev/null +++ b/src/mac/errors.rs @@ -0,0 +1,20 @@ +use thiserror::Error; + +use mach2::kern_return::kern_return_t; + +#[derive(Debug, Error)] +pub enum WriterError { + #[error("kernel error ({})", _0)] + Kernel(kern_return_t), +} + +#[inline] +pub(crate) fn kern_ret(func: impl FnOnce() -> kern_return_t) -> Result<(), WriterError> { + let res = func(); + + if res == KERN_SUCCESS { + Ok(()) + } else { + Err(WriterError::Kerne(res)) + } +} diff --git a/src/mac/minidump_writer.rs b/src/mac/minidump_writer.rs new file mode 100644 index 00000000..ea8484cc --- /dev/null +++ b/src/mac/minidump_writer.rs @@ -0,0 +1,161 @@ +use crate::mac::errors::WriterError; +use crash_context::CrashContext; +use std::io::{Seek, Write}; + +pub type DumpBuf = Buffer; +type Result = std::result::Result; + +#[derive(Debug)] +pub struct DirSection<'a, W> +where + W: Write + Seek, +{ + curr_idx: usize, + section: MemoryArrayWriter, + /// If we have to append to some file, we have to know where we currently are + destination_start_offset: u64, + destination: &'a mut W, + last_position_written_to_file: u64, +} + +impl<'a, W> DirSection<'a, W> +where + W: Write + Seek, +{ + fn new( + buffer: &mut DumpBuf, + index_length: u32, + destination: &'a mut W, + ) -> std::result::Result { + let dir_section = + MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; + Ok(DirSection { + curr_idx: 0, + section: dir_section, + destination_start_offset: destination.seek(SeekFrom::Current(0))?, + destination, + last_position_written_to_file: 0, + }) + } + + fn position(&self) -> u32 { + self.section.position + } + + fn dump_dir_entry( + &mut self, + buffer: &mut DumpBuf, + dirent: MDRawDirectory, + ) -> std::result::Result<(), FileWriterError> { + self.section.set_value_at(buffer, dirent, self.curr_idx)?; + + // Now write it to file + + // First get all the positions + let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; + let idx_pos = self.section.location_of_index(self.curr_idx); + self.curr_idx += 1; + + self.destination.seek(std::io::SeekFrom::Start( + self.destination_start_offset + idx_pos.rva as u64, + ))?; + let start = idx_pos.rva as usize; + let end = (idx_pos.rva + idx_pos.data_size) as usize; + self.destination.write_all(&buffer[start..end])?; + + // Reset file-position + self.destination + .seek(std::io::SeekFrom::Start(curr_file_pos))?; + + Ok(()) + } + + /// Writes 2 things to file: + /// 1. The given dirent into the dir section in the header (if any is given) + /// 2. Everything in the in-memory buffer that was added since the last call to this function + fn write_to_file( + &mut self, + buffer: &mut DumpBuf, + dirent: Option, + ) -> std::result::Result<(), FileWriterError> { + if let Some(dirent) = dirent { + self.dump_dir_entry(buffer, dirent)?; + } + + let start_pos = self.last_position_written_to_file as usize; + self.destination.write_all(&buffer[start_pos..])?; + self.last_position_written_to_file = buffer.position(); + Ok(()) + } +} + +pub struct MinidumpWriter { + /// The crash context as captured by an exception handler + crash_context: crash_context::CrashContext, + /// List of raw blocks of memory we've written into the stream. These are + /// referenced by other streams (eg thread list) + memory_blocks: Vec, +} + +impl MinidumpWriter { + /// Creates a minidump writer + pub fn new(crash_context: crash_context::CrashContext) -> Self { + Self { + crash_context, + memory_blocks: Vec::new(), + } + } + + pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result> { + let writers = { + let mut writers = vec![ + Self::write_thread_list, + Self::write_memory_list, + Self::write_system_info, + Self::write_module_list, + Self::write_misc_info, + Self::write_breakpad_info, + ]; + + // Exception stream needs to be the last entry in this array as it may + // be omitted in the case where the minidump is written without an + // exception. + if self.crash_context.exception.is_some() { + writers.push_back(Self::write_exception); + } + + writers + }; + + let num_writers = writers.len() as u32; + let mut buffer = Buffer::with_capacity(0); + + let mut header_section = MemoryWriter::::alloc(buffer)?; + let mut dir_section = DirSection::new(buffer, num_writers, destination)?; + + let header = MDRawHeader { + signature: MD_HEADER_SIGNATURE, + version: MD_HEADER_VERSION, + stream_count: num_writers, + stream_directory_rva: dir_section.position(), + checksum: 0, /* Can be 0. In fact, that's all that's + * been found in minidump files. */ + time_date_stamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH)? + .as_secs() as u32, // TODO: This is not Y2038 safe, but thats how its currently defined as + flags: 0, + }; + header_section.set_value(buffer, header)?; + + // Ensure the header gets flushed. If we crash somewhere below, + // we should have a mostly-intact dump + dir_section.write_to_file(buffer, None)?; + + for writer in writers { + let dirent = writer(self, buffer, dumper)?; + dir_section.write_to_file(buffer, Some(dirent))?; + } + + Ok(buffer) + } +} diff --git a/src/mac/streams.rs b/src/mac/streams.rs new file mode 100644 index 00000000..4d149fd6 --- /dev/null +++ b/src/mac/streams.rs @@ -0,0 +1,7 @@ +mod memory_list; +mod module_list; +mod system_info; +mod thread_list; + +use super::minidump_writer::{DumpBuf, MinidumpWriter}; +use crate::mac::errors::ker_ret; diff --git a/src/mac/streams/memory_list.rs b/src/mac/streams/memory_list.rs new file mode 100644 index 00000000..f782cd93 --- /dev/null +++ b/src/mac/streams/memory_list.rs @@ -0,0 +1,76 @@ +use super::*; + +impl MiniDumpWriter { + fn write_memory_list(&mut self, buffer: &mut DumpBuf) -> Result { + // Include some memory around the instruction pointer if the crash was + // due to an exception + const IP_MEM_SIZE: usize = 256; + + if self.crash_context.exc_info.is_some() { + let mut thread_state = thread_list_stream::ThreadState::default(); + // SAFETY: syscall + if unsafe { + mach2::thread_act::thread_get_state( + tid, + THREAD_STATE_FLAVOR, + thread_state.state.as_mut_ptr(), + &mut thread_state.state_size, + ) + } == mach2::kern_return::KERN_SUCCESS + { + } else { + None + } + + let get_ip_block = |task, tid| -> Option { + let thread_state = Self::get_thread_state(tid).ok()?; + + let ip = thread_state.pc(); + + // Bound it to the upper and lower bounds of the region + // it's contained within. If it's not in a known memory region, + // don't bother trying to write it. + let region = self.get_vm_region(ip).ok()?; + + if ip < region.start || ip > region.end { + return None; + } + + // Try to get IP_MEM_SIZE / 2 bytes before and after the IP, but + // settle for whatever's available. + let start = std::cmp::max(region.start, ip - IP_MEM_SIZE / 2); + let end = std::cmp::min(ip + IP_MEM_SIZE / 2, region.end); + + Some(start..end) + }; + + if let Some(ip_range) = get_ip_block() { + let size = ip_range.end - ip_range.start; + let stack_buffer = self.read_task_memory(ip_range.start as _, size)?; + let ip_location = MDLocationDescriptor { + data_size: size as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer)?; + + self.memory_blocks.push(MDMemoryDescriptor { + start_of_memory_range: ip_range.start, + memory: ip_location, + }); + } + } + + let list_header = + MemoryWriter::::alloc_with_val(buffer, self.memory_blocks.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::MemoryListStream as u32, + location: list_header.location(), + }; + + let block_list = + MemoryArrayWriter::::alloc_from_array(buffer, &self.memory_blocks)?; + + dirent.location.data_size += block_list.location().data_size; + } +} diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs new file mode 100644 index 00000000..e603721a --- /dev/null +++ b/src/mac/streams/module_list.rs @@ -0,0 +1,262 @@ +use super::*; + +#[cfg(target_pointer_width = "32")] +compile_error!("this module assumes a 64-bit pointer width"); + +fn all_image_addr(task: mach2::mach_types::task_name_t) -> Option { + let mut task_dyld_info = std::mem::MaybeUninit::::uninit(); + let mut count = std::mem::size_of::() + / std::mem::size_of::(); + + // SAFETY: syscall + kern_ret(|| unsafe { + mach2::task::task_info( + task, + mach2::task_info::TASK_DYLD_INFO, + task_dyld_info.as_mut_ptr().cast(), + &mut count, + ) + }) + .ok()?; + + Some(task_dyld_info.all_image_info_addr) +} + +impl MiniDumpWriter { + fn write_module_list(&mut self, buffer: &mut DumpBuf) -> Result { + let modules = if let Some(all_images) = all_image_addr(self.crash_context.task) { + + } else { + vec![] + }; + + let list_header = MemoryWriter::::alloc_with_val(buffer, modules.len() as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ModuleListStream as u32, + location: list_header.location(), + }; + + if !modules.is_empty() { + let mapping_list = MemoryArrayWriter::::alloc_from_iter(buffer, modules)?; + dirent.location.data_size += mapping_list.location().data_size; + } + + Ok(dirent) + } + + fn read_loaded_images(&self, all_images_addr: u64) -> Result, WriterError> { + // Read the structure inside of dyld that contains information about + // loaded images. We're reading from the desired task's address space. + + // dyld_all_image_infos defined in usr/include/mach-o/dyld_images.h, we + // only need a couple of fields at the beginning + #[repr(C)] + struct AllImagesInfo { + version: u32, // == 1 in Mac OS X 10.4 + info_array_count: u32, + info_array_addr: u64, + } + + // dyld_image_info + #[repr(C)] + struct ImageInfo { + load_address: u64, + file_path: u64, + file_mod_date: u64, + } + + // usr/include/mach-o/loader.h + #[repr(C)] + struct MachHeader { + magic: u32, // mach magic number identifier + cpu_type: i32, // cpu_type_t cpu specifier + cpu_sub_type: i32, // cpu_subtype_t machine specifier + file_type: u32, // type of file + num_commands: u32, // number of load commands + size_commands: u32, // size of all the load commands + flags: u32, + __reserved: u32, + } + + // Here we make the assumption that dyld loaded at the same address in + // the crashed process vs. this one. This is an assumption made in + // "dyld_debug.c" and is said to be nearly always valid. + let dyld_all_info_buf = self.read_task_memory(all_images_addr, std::mem::size_of::())?; + let dyld_info: &AllImagesInfo = &*(dyld_all_info_buf.cast()); + + let dyld_info_buf = self.read_task_memory(dyld_info.info_array_addr, dyld_info.info_array_count * std::mem::size_of::())?; + + let all_images = unsafe { + std::slice::from_raw_parts(dyld_info.buf.as_ptr().cast::(), dyld_info.info_array_count as usize) + }; + + let mut images = Vec::with_capacity(all_images.len(); + + for image in all_images { + let mach_header_buf = if let Ok(buf) = self.read_task_memory(image.load_address, std::mem::size_of::()) { + buf + } else { + continue; + }; + + let header: &MachHeader = &*(mach_header_buf.cast()); + //let header_size = std::mem::size_of::() + header.size_commands; + + let file_path = if image.file_path != 0 { + } + } + + for (int i = 0; i < count; ++i) { + dyld_image_info& info = infoArray[i]; + + // First read just the mach_header from the image in the task. + vector mach_header_bytes; + if (ReadTaskMemory(images.task_, + info.load_address_, + sizeof(mach_header_type), + mach_header_bytes) != KERN_SUCCESS) + continue; // bail on this dynamic image + + mach_header_type* header = + reinterpret_cast(&mach_header_bytes[0]); + + // Now determine the total amount necessary to read the header + // plus all of the load commands. + size_t header_size = + sizeof(mach_header_type) + header->sizeofcmds; + + if (ReadTaskMemory(images.task_, + info.load_address_, + header_size, + mach_header_bytes) != KERN_SUCCESS) + continue; + + // Read the file name from the task's memory space. + string file_path; + if (info.file_path_) { + // Although we're reading kMaxStringLength bytes, it's copied in the + // the DynamicImage constructor below with the correct string length, + // so it's not really wasting memory. + file_path = ReadTaskString(images.task_, info.file_path_); + } + + // Create an object representing this image and add it to our list. + DynamicImage* new_image; + new_image = new DynamicImage(&mach_header_bytes[0], + header_size, + info.load_address_, + file_path, + static_cast(info.file_mod_date_), + images.task_, + images.cpu_type_); + + if (new_image->IsValid()) { + images.image_list_.push_back(DynamicImageRef(new_image)); + } else { + delete new_image; + } + } + + // sorts based on loading address + sort(images.image_list_.begin(), images.image_list_.end()); + // remove duplicates - this happens in certain strange cases + // You can see it in DashboardClient when Google Gadgets plugin + // is installed. Apple's crash reporter log and gdb "info shared" + // both show the same library multiple times at the same address + + vector::iterator it = unique(images.image_list_.begin(), + images.image_list_.end()); + images.image_list_.erase(it, images.image_list_.end()); + } + + fn read_string(&self, addr: u64) -> Result { + // The problem is we don't know how much to read until we know how long + // the string is. And we don't know how long the string is, until we've read + // the memory! So, we'll try to read kMaxStringLength bytes + // (or as many bytes as we can until we reach the end of the vm region). + let size_to_end = { + let mut region_base = addr; + let mut region_size = 0; + let mut nesting_level = 0; + vm_region_submap_info_64 submap_info; + mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64; + + // Get information about the vm region containing |address| + vm_region_recurse_info_t region_info; + region_info = reinterpret_cast(&submap_info); + + kern_return_t result = + mach_vm_region_recurse(target_task, + ®ion_base, + ®ion_size, + &nesting_level, + region_info, + &info_count); + + if (result == KERN_SUCCESS) { + // Get distance from |address| to the end of this region + *size_to_end = region_base + region_size -(mach_vm_address_t)address; + + // If we want to handle strings as long as 4096 characters we may need + // to check if there's a vm region immediately following the first one. + // If so, we need to extend |*size_to_end| to go all the way to the end + // of the second region. + if (*size_to_end < 4096) { + // Second region starts where the first one ends + mach_vm_address_t region_base2 = + (mach_vm_address_t)(region_base + region_size); + mach_vm_size_t region_size2; + + // Get information about the following vm region + result = + mach_vm_region_recurse(target_task, + ®ion_base2, + ®ion_size2, + &nesting_level, + region_info, + &info_count); + + // Extend region_size to go all the way to the end of the 2nd region + if (result == KERN_SUCCESS + && region_base2 == region_base + region_size) { + region_size += region_size2; + } + } + + *size_to_end = region_base + region_size -(mach_vm_address_t)address; + } else { + region_size = 0; + *size_to_end = 0; + } + + return region_size; + }; + mach_vm_size_t size_to_end; + GetMemoryRegionSize(target_task, address, &size_to_end); + + if (size_to_end > 0) { + mach_vm_size_t size_to_read = + size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end; + + vector bytes; + if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) != + KERN_SUCCESS) + return string(); + + //============================================================================== + // Returns the size of the memory region containing |address| and the + // number of bytes from |address| to the end of the region. + // We potentially, will extend the size of the original + // region by the size of the following region if it's contiguous with the + // first in order to handle cases when we're reading strings and they + // straddle two vm regions. + // + static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task, + const uint64_t address, + mach_vm_size_t* size_to_end) { + + } + } + } +} diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs new file mode 100644 index 00000000..b20ba27b --- /dev/null +++ b/src/mac/streams/system_info.rs @@ -0,0 +1,258 @@ +use super::*; +use crate::minidump_format::*; + +fn sysctl_by_name(name: &[u8]) -> T { + let mut out = T::default(); + let mut len = std::mem::size_of_val(&out); + + // SAFETY: syscall + unsafe { + if libc::sysctlbyname( + name.as_ptr().cast(), + (&mut out).cast(), + &mut len, + std::ptr::null_mut(), + 0, + ) != 0 + { + // log? + T::default() + } else { + out + } + } +} + +fn int_sysctl_by_name + Default>(name: &[u8]) -> T { + let val = sysctl_by_name::(name); + T::try_from(val).unwrap_or_default() +} + +fn sysctl_string(name: &[u8]) -> String { + let mut buf_len = 0; + + // SAFETY: syscalls + let string_buf = unsafe { + // Retrieve the size of the string (including null terminator) + if libc::sysctlbyname( + name.as_ptr().cast(), + std::ptr::null_mut(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + || buf_len <= 1 + { + return String::new(); + } + + let mut buff = Vec::new(); + buff.resize(buf_len, 0); + + if libc::sysctlbyname( + name.as_ptr().cast(), + buff.as_mut_ptr().cast(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + { + return String::new(); + } + + buff.pop(); // remove null terminator + buff + }; + + String::from_utf8(string_buf).unwrap_or_default() +} + +/// Retrieve the OS version information. +/// +/// Note that this only works on 10.13.4+, but that release is over 4 years old +/// and 1 version behind the latest unsupported release at the time of this writing +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionProductVersionKey +fn os_version() -> (u32, u32, u32) { + let vers = sysctl_string(b"kern.osproductversion\0"); + + let inner = || { + let mut it = vers.split('.'); + + let major: u32 = it.next()?.parse().ok()?; + let minor: u32 = it.next()?.parse().ok()?; + let patch: u32 = it.next().and_then(|p| p.parse().ok()).unwrap_or_default(); + + Some((major, minor, patch)) + }; + + inner().unwrap_or_default() +} + +/// Retrieves the OS build version. +/// +/// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this +/// via _CFCopySystemVersionDictionary->_kCFSystemVersionBuildVersionKey. I have +/// no idea how long this has been the case, but the same information can be +/// retrieved via `sysctlbyname` via the `kern.osversion` key as seen by comparing +/// its value versus the output of the `sw_vers -buildVersion` command +#[inline] +fn build_version() -> String { + sysctl_string(b"kern.osversion\0") +} + +/// Retrieves more detailed information on the cpu. +/// +/// Note that this function is only implemented on `x86_64` as Apple doesn't +/// expose similar info on `aarch64` (or at least, not via the same mechanisms) +fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { + if !cfg!(target_arch = "x86_64") { + return; + } + + let mut md_feats = 1 << 2 /*PF_COMPARE_EXCHANGE_DOUBLE*/; + let features: u64 = sysctl_by_name(b"machdep.cpu.feature_bits\0"); + + // Map the cpuid feature to its equivalent minidump cpu feature. + // See https://en.wikipedia.org/wiki/CPUID for where the values for the + // various cpuid bits come from, and + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // for where the bits for the the minidump come from + macro_rules! map_feature { + ($set:expr, $cpuid_bit:expr, $md_bit:expr) => { + if $set & (1 << $cpuid_bit) != 0 { + md_feats |= 1 << $md_bit; + } + }; + } + + map_feature!( + features, 4, /*TSC*/ + 8 /* PF_RDTSC_INSTRUCTION_AVAILABLE */ + ); + map_feature!(features, 6 /*PAE*/, 9 /* PF_PAE_ENABLED */); + map_feature!( + features, 23, /*MMX*/ + 3 /* PF_MMX_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 25, /*SSE*/ + 6 /* PF_XMMI_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 26, /*SSE2*/ + 10 /* PF_XMMI64_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 32, /*SSE3*/ + 13 /* PF_SSE3_INSTRUCTIONS_AVAILABLE */ + ); + map_feature!( + features, 45, /*CX16*/ + 14 /* PF_COMPARE_EXCHANGE128 */ + ); + map_feature!(features, 58 /*XSAVE*/, 17 /* PF_XSAVE_ENABLED */); + map_feature!( + features, 62, /*RDRAND*/ + 28 /* PF_RDRAND_INSTRUCTION_AVAILABLE */ + ); + + let ext_features: u64 = sysctl_by_name(b"machdep.cpu.extfeature_bits\0"); + + map_feature!( + ext_features, + 27, /* RDTSCP */ + 32 /* PF_RDTSCP_INSTRUCTION_AVAILABLE */ + ); + map_feature!( + ext_features, + 31, /* 3DNOW */ + 7 /* PF_3DNOW_INSTRUCTIONS_AVAILABLE */ + ); + + let leaf_features: u32 = sysctl_by_name(b"machdep.cpu.leaf7_feature_bits\0"); + map_feature!( + leaf_features, + 0, /* F7_FSGSBASE */ + 22 /* PF_RDWRFSGSBASE_AVAILABLE */ + ); + + // In newer production kernels, NX is always enabled. + // See 10.15.0 xnu-6153.11.26/osfmk/x86_64/pmap.c nx_enabled. + md_feats |= 1 << 12 /* PF_NX_ENABLED */; + + // All CPUs that Apple is known to have shipped should support DAZ. + md_feats |= 1 << 11 /* PF_SSE_DAZ_MODE_AVAILABLE */; + + // minidump_common::format::OtherCpuInfo is just 2 adjacent u64's, we only + // set the first, so just do a direct write to the bytes + cpu[..std::mem::size_of::()].copy_from_slice(md_feats.to_ne_bytes()); +} + +impl MiniDumpWriter { + fn write_system_info(&mut self, buffer: &mut DumpBuf) -> Result { + let mut info_section = MemoryWriter::::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::SystemInfoStream as u32, + location: info_section.location(), + }; + + let number_of_processors: u8 = int_sysctl_by_name(b"hw.ncpu\0"); + // SAFETY: POD buffer + let mut cpu: format::CPU_INFORMATION = unsafe { std::mem::zeroed() }; + read_cpu_info(&mut cpu); + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_AMD64; + + // machdep.cpu.family and machdep.cpu.model already take the extended family + // and model IDs into account. See 10.9.2 xnu-2422.90.20/osfmk/i386/cpuid.c + // cpuid_set_generic_info(). + let processor_level: u16 = int_sysctl_by_name(b"machdep.cpu.family\0"); + let model: u8 = int_sysctl_by_name(b"machdep.cpu.model\0"); + let stepping: u8 = int_sysctl_by_name(b"machdep.cpu.stepping\0"); + + let processor_revision: u16 = (model << 8) | stepping; + } else if #[cfg(target_arch = "aarch64")] { + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64; + + let family: u32 = sysctl_by_name(b"hw.cpufamily\0"); + + let processor_level = (family & 0xffff0000 >> 16) as u16; + let processor_revision = (family & 0x0000ffff) as u16; + } else { + compile_error!("unsupported target architecture"); + } + } + + let (major_version, minor_version, build_number) = os_version(); + let os_version_loc = write_string_to_location(buffer, &build_version())?; + + let info = MDRawSystemInfo { + // CPU + processor_architecture: processor_architecture as u16, + processor_level, + processor_revision, + number_of_processors, + product_type, + cpu, + + // OS + platform_id: PlatformId::MacOs, + product_type: 1, // VER_NT_WORKSTATION, could also be VER_NT_SERVER but...seriously? + major_version, + minor_version, + build_number, + csd_version_rva: os_version_loc.rva, + + suite_mask: 0, + reserved2: 0, + }; + + info_section.set_value(buffer, info)?; + + Ok(dirent) + } +} diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs new file mode 100644 index 00000000..6e54f834 --- /dev/null +++ b/src/mac/streams/thread_list.rs @@ -0,0 +1,351 @@ +use super::*; + +// From /usr/include/mach/machine/thread_state.h +const THREAD_STATE_MAX: usize = 1296; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h + const THREAD_STATE_FLAVOR: u32 = 4; + } else if #[cfg(target_arch = "aarch64")] { + /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h + const THREAD_STATE_FLAVOR: u32 = 6; + + // Missing from mach2 atm + // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h + #[repr(C)] + struct Arm64ThreadState { + x: [u64; 29], + fp: u64, + lr: u64, + sp: u64, + pc: u64, + cpsr: u32, + __pad: u32, + } + } +} + +struct ThreadState { + state: [u32; THREAD_STATE_MAX], + state_size: u32, +} + +impl Default for ThreadState { + fn default() -> Self { + Self { + state: [0u32; THREAD_STATE_MAX], + state_size: THREAD_STATE_MAX * std::mem::size_of::() as u32, + } + } +} + +impl ThreadState { + pub fn pc(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let x86_64_state: &mach2::structs::x86_thread_state64_t = &*(thread_state.state.as_ptr().cast()); + x86_64_state.__pc + } else if #[cfg(target_arch = "aarch64")] { + let aarch64_state: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + aarch64_state.pc + } + } + } +} + +pub(crate) struct VMRegionInfo { + pub(crate) info: mach2::vm_region::vm_region_submap_info_64, + pub(crate) range: std::ops::Range, +} + +impl MinidumpWriter { + fn write_thread_list(&mut self, buffer: &mut DumpBuf) -> Result { + // Retrieve the list of threads from the task that crashed. + // SAFETY: syscall + let mut threads = std::ptr::null_mut(); + let mut thread_count = 0; + + kern_ret(|| unsafe { + mach2::task::task_threads(self.crash_context.task, &mut threads, &mut thread_count) + })?; + + // Ignore the thread that handled the exception + if self.crash_context.handler_thread != mach2::port::MACH_PORT_NULL { + thread_count -= 1; + } + + let list_header = MemoryWriter::::alloc_with_val(buffer, thread_count as u32)?; + + let mut dirent = MDRawDirectory { + stream_type: MDStreamType::ThreadListStream as u32, + location: list_header.location(), + }; + + let mut thread_list = MemoryArrayWriter::::alloc_array(buffer, num_threads)?; + dirent.location.data_size += thread_list.location().data_size; + + let threads = unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }; + + for (i, tid) in threads.iter().enumerate() { + let thread = self.write_thread(buffer, tid)?; + thread_list.set_value_at(buffer, thread, i)?; + } + + Ok(dirent) + } + + fn write_thread(&mut self, buffer: &mut DumpBuf, tid: u32) -> Result { + let mut thread = MDRawThread { + thread_id: tid, + suspend_count: 0, + priority_class: 0, + priority: 0, + teb: 0, + stack: MDMemoryDescriptor::default(), + thread_context: MDLocationDescriptor::default(), + }; + + let thread_state = Self::get_thread_state(tid)?; + + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let x86_64_state: &mach2::structs::x86_thread_state64_t = &*(thread_state.state.as_ptr().cast()); + + self.write_stack_from_start_address(x86_64_state.__rsp, buffer, &mut thread)?; + } else if #[cfg(target_arch = "aarch64")] { + let aarch64_state: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + self.write_stack_from_start_address(aarch64_state.sp, buffer, &mut thread)?; + } else { + compile_error!("unsupported target arch"); + } + } + + let mut cpu: RawContextCPU = Default::default(); + Self::fill_cpu_context(thread_state, &mut cpu); + let cpu_section = MemoryWriter::alloc_with_val(buffer, cpu)?; + thread.thread_context = cpu_section.location(); + Ok(thread) + } + + fn get_thread_state(tid: u32) -> Result { + let mut thread_state = ThreadState::default(); + + // SAFETY: syscall + kern_ret(|| unsafe { + mach2::thread_act::thread_get_state( + tid, + THREAD_STATE_FLAVOR, + thread_state.state.as_mut_ptr(), + &mut thread_state.state_size, + ) + })?; + + Ok(thread_state) + } + + fn write_stack_from_start_address( + &mut self, + start: u64, + buffer: &mut DumpBuf, + thread: &mut MDRawThread, + ) -> Result<(), WriterError> { + thread.stack.start_of_memory_range = start.try_into()?; + thread.stack.memory.data_size = 0; + thread.stack.memory.rva = buffer.position() as u32; + + let stack_size = self.calculate_stack_size(start); + + let stack_location = if stack_size == 0 { + // In some situations the stack address for the thread can come back 0. + // In these cases we skip over the threads in question and stuff the + // stack with a clearly borked value. + thread.stack.start_of_memory_range = 0xdeadbeef; + + let stack_location = MDLocationDescriptor { + data_size: 16, + rva: buffer.position() as u32, + }; + buffer.write_all(0xdeadbeefu64.as_ne_bytes())?; + buffer.write_all(0xdeadbeefu64.as_ne_bytes())?; + stack_location + } else { + let stack_buffer = self.read_task_memory(start, stack_size)?; + let stack_location = MDLocationDescriptor { + data_size: stack_buffer.len() as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer)?; + stack_location + }; + + thread.stack.memory = stack_location; + self.memory_blocks.push(thread.stack); + Ok(()) + } + + fn calculate_stack_size(&self, start_address: u64) -> usize { + if start_address == 0 { + return 0; + } + + let mut region = if let Ok(region) = self.get_vm_region(start_address) { + region + } else { + return 0; + }; + + // Failure or stack corruption, since mach_vm_region had to go + // higher in the process address space to find a valid region. + if start_address < region.range.start { + return 0; + } + + // If the user tag is VM_MEMORY_STACK, look for more readable regions with + // the same tag placed immediately above the computed stack region. Under + // some circumstances, the stack for thread 0 winds up broken up into + // multiple distinct abutting regions. This can happen for several reasons, + // including user code that calls setrlimit(RLIMIT_STACK, ...) or changes + // the access on stack pages by calling mprotect. + if region.info.user_tag == mach2::vm_statistics::VM_MEMORY_STACK { + loop { + let proposed_next_region_base = region.range.end; + + region = if let Ok(reg) = self.get_vm_region(region.range.end) { + reg + } else { + break; + }; + + if region.range.start != proposed_next_region_base + || region.info.user_tag != mach2::vm_statistics::VM_MEMORY_STACK + || (region.info.protection & mach2::vm_prot::VM_PROT_READ) == 0 + { + break; + } + + stack_region_size += region.range.end - region.range.start; + } + } + + stack_region_base + stack_region_size - start_addr + } + + fn read_task_memory(&self, address: u64, length: usize) -> Result, WriterError> { + let sys_page_size = libc::getpagesize(); + + // use the negative of the page size for the mask to find the page address + let page_address = address & (-sys_page_size); + let last_page_address = (address + length + (sys_page_size - 1)) & (-sys_page_size); + + let page_size = last_page_address - page_address; + let mut local_start = std::ptr::null_mut(); + let mut local_length = 0; + + kern_ret(|| unsafe { + mach2::vm::mach_vm_read( + self.crash_context.task, + page_address, + page_size, + &mut local_start, + &mut local_length, + ) + })?; + + let mut buffer = Vec::with_capacity(length); + + let task_buffer = + std::slice::from_raw_parts(local_start.offset(address - page_address), length); + buffer.extend_from_slice(task_buffer); + + // Don't worry about the return here, if something goes wrong there's probably + // not much we can do about, and we have what we want anyways + mach2::vm::mach_vm_deallocate(mach2::traps::mach_task_self(), local_start, local_length); + + Ok(buffer) + } + + fn fill_cpu_context(thread_state: &ThreadState, out: &mut RawContextCPU) { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + out.context_flags = format::ContextFlagsCpu::CONTEXT_AMD64.bits(); + + let ts: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + + out.rax = ts.__rax; + out.rbx = ts.__rbx; + out.rcx = ts.__rcx; + out.rdx = ts.__rdx; + out.rdi = ts.__rdi; + out.rsi = ts.__rsi; + out.rbp = ts.__rbp; + out.rsp = ts.__rsp; + out.r8 = ts.__r8; + out.r9 = ts.__r9; + out.r10 = ts.__r10; + out.r11 = ts.__r11; + out.r12 = ts.__r12; + out.r13 = ts.__r13; + out.r14 = ts.__r14; + out.r15 = ts.__r15; + out.rip = ts.__rip; + // according to AMD's software developer guide, bits above 18 are + // not used in the flags register. Since the minidump format + // specifies 32 bits for the flags register, we can truncate safely + // with no loss. + out.eflags = ts.__rflags as _; + out.cs = ts.__cs; + out.fs = ts.__fs; + out.gs = ts.__gs; + } else if #[cfg(target_arch = "aarch64")] { + // This is kind of a lie as we don't actually include the full float state..? + out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; + + let ts: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + + out.cpsr = ts.cpsr; + out.iregs[..28].copy_from_slice(&ts.x[..28]); + out.iregs[29] = ts.fp; + out.iregs[30] = ts.lr; + out.sp = ts.sp; + out.pc = ts.pc; + } else { + compile_error!("unsupported target arch"); + } + } + } + + fn get_vm_region(&self, addr: u64) -> Result { + let mut region_base = addr; + let mut region_size = 0; + let mut nesting_level = 0; + let mut region_info = 0; + let mut submap_info = std::mem::MaybeUninit::::uninit(); + + // mach/vm_region.h + const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = + (std::mem::size_of::() + / std::mem::size_of::()) as u32; + + let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; + + kern_ret(|| + // SAFETY: syscall + unsafe { + mach2::vm::mach_vm_region_recurse( + self.crash_context.task, + &mut region_base, + &mut region_size, + &mut nesting_level, + submap_info.as_mut_ptr().cast(), + &mut info_count, + ) + })?; + + Ok(VMRegionInfo { + // SAFETY: this will be valid if the syscall succeeded + info: unsafe { submap_info.assume_init() }, + range: region_base..region_base + region_base, + }) + } +} From 977ebb221c7d6cef9a40abb52cb089dda973e82f Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 12 Apr 2022 17:52:53 +0200 Subject: [PATCH 03/53] Further fleshing out --- src/mac/errors.rs | 2 + src/mac/streams/module_list.rs | 446 ++++++++++++++++++++------------- 2 files changed, 274 insertions(+), 174 deletions(-) diff --git a/src/mac/errors.rs b/src/mac/errors.rs index 47536942..00f99008 100644 --- a/src/mac/errors.rs +++ b/src/mac/errors.rs @@ -6,6 +6,8 @@ use mach2::kern_return::kern_return_t; pub enum WriterError { #[error("kernel error ({})", _0)] Kernel(kern_return_t), + #[error("detected an invalid mach image header")] + InvalidMachHeader, } #[inline] diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index e603721a..ce61915d 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -22,10 +22,29 @@ fn all_image_addr(task: mach2::mach_types::task_name_t) -> Option { Some(task_dyld_info.all_image_info_addr) } +// dyld_image_info +#[repr(C)] +struct ImageInfo { + load_address: u64, + file_path: u64, + file_mod_date: u64, +} + +// usr/include/mach-o/loader.h, the file type for the main executable image +const MH_EXECUTE: u32 = 0x2; +// usr/include/mach-o/loader.h, magic number for MachHeader +const MH_MAGIC_64: u32 = 0xfeedfacf; +// usr/include/mach-o/loader.h, command to map a segment +const LC_SEGMENT_64: u32 = 0x19; +// usr/include/mach-o/loader.h, dynamically linked shared lib ident +const LC_ID_DYLIB: u32 = 0xd; +// usr/include/mach-o/loader.h, the uuid +const LC_UUID: u32 = 0x1b; + impl MiniDumpWriter { fn write_module_list(&mut self, buffer: &mut DumpBuf) -> Result { let modules = if let Some(all_images) = all_image_addr(self.crash_context.task) { - + self.read_loaded_modules(all_images)? } else { vec![] }; @@ -45,7 +64,7 @@ impl MiniDumpWriter { Ok(dirent) } - fn read_loaded_images(&self, all_images_addr: u64) -> Result, WriterError> { + fn read_loaded_modules(&self, all_images_addr: u64) -> Result { // Read the structure inside of dyld that contains information about // loaded images. We're reading from the desired task's address space. @@ -58,205 +77,284 @@ impl MiniDumpWriter { info_array_addr: u64, } - // dyld_image_info - #[repr(C)] - struct ImageInfo { - load_address: u64, - file_path: u64, - file_mod_date: u64, - } - - // usr/include/mach-o/loader.h - #[repr(C)] - struct MachHeader { - magic: u32, // mach magic number identifier - cpu_type: i32, // cpu_type_t cpu specifier - cpu_sub_type: i32, // cpu_subtype_t machine specifier - file_type: u32, // type of file - num_commands: u32, // number of load commands - size_commands: u32, // size of all the load commands - flags: u32, - __reserved: u32, - } - // Here we make the assumption that dyld loaded at the same address in // the crashed process vs. this one. This is an assumption made in // "dyld_debug.c" and is said to be nearly always valid. - let dyld_all_info_buf = self.read_task_memory(all_images_addr, std::mem::size_of::())?; + let dyld_all_info_buf = + self.read_task_memory(all_images_addr, std::mem::size_of::())?; let dyld_info: &AllImagesInfo = &*(dyld_all_info_buf.cast()); - let dyld_info_buf = self.read_task_memory(dyld_info.info_array_addr, dyld_info.info_array_count * std::mem::size_of::())?; + let dyld_info_buf = self.read_task_memory( + dyld_info.info_array_addr, + dyld_info.info_array_count * std::mem::size_of::(), + )?; let all_images = unsafe { - std::slice::from_raw_parts(dyld_info.buf.as_ptr().cast::(), dyld_info.info_array_count as usize) + std::slice::from_raw_parts( + dyld_info.buf.as_ptr().cast::(), + dyld_info.info_array_count as usize, + ) }; - - let mut images = Vec::with_capacity(all_images.len(); + + let mut images = Vec::with_capacity(all_images.len()); for image in all_images { - let mach_header_buf = if let Ok(buf) = self.read_task_memory(image.load_address, std::mem::size_of::()) { - buf + // Apparently MacOS will happily list the same image multiple times + // for some reason, so only add images once + let insert_index = if let Err(i) = + images.binary_search_by(|img| image.load_address.cmp(&img.load_address)) + { + i } else { continue; }; - let header: &MachHeader = &*(mach_header_buf.cast()); - //let header_size = std::mem::size_of::() + header.size_commands; - - let file_path = if image.file_path != 0 { + if let Ok(module) = self.read_module(image) { + images.insert(insert_index, module); } } - for (int i = 0; i < count; ++i) { - dyld_image_info& info = infoArray[i]; - - // First read just the mach_header from the image in the task. - vector mach_header_bytes; - if (ReadTaskMemory(images.task_, - info.load_address_, - sizeof(mach_header_type), - mach_header_bytes) != KERN_SUCCESS) - continue; // bail on this dynamic image - - mach_header_type* header = - reinterpret_cast(&mach_header_bytes[0]); - - // Now determine the total amount necessary to read the header - // plus all of the load commands. - size_t header_size = - sizeof(mach_header_type) + header->sizeofcmds; - - if (ReadTaskMemory(images.task_, - info.load_address_, - header_size, - mach_header_bytes) != KERN_SUCCESS) - continue; - - // Read the file name from the task's memory space. - string file_path; - if (info.file_path_) { - // Although we're reading kMaxStringLength bytes, it's copied in the - // the DynamicImage constructor below with the correct string length, - // so it's not really wasting memory. - file_path = ReadTaskString(images.task_, info.file_path_); - } - - // Create an object representing this image and add it to our list. - DynamicImage* new_image; - new_image = new DynamicImage(&mach_header_bytes[0], - header_size, - info.load_address_, - file_path, - static_cast(info.file_mod_date_), - images.task_, - images.cpu_type_); - - if (new_image->IsValid()) { - images.image_list_.push_back(DynamicImageRef(new_image)); - } else { - delete new_image; - } + // The modules are sorted by load address, but we always want the + // main executable to be first in the minidump + + Ok(images) } - // sorts based on loading address - sort(images.image_list_.begin(), images.image_list_.end()); - // remove duplicates - this happens in certain strange cases - // You can see it in DashboardClient when Google Gadgets plugin - // is installed. Apple's crash reporter log and gdb "info shared" - // both show the same library multiple times at the same address + fn read_module(&self, image: ImageInfo, buf: &mut DumpBuf) -> Result { + // usr/include/mach-o/loader.h + #[repr(C)] + struct MachHeader { + magic: u32, // mach magic number identifier + cpu_type: i32, // cpu_type_t cpu specifier + cpu_sub_type: i32, // cpu_subtype_t machine specifier + file_type: u32, // type of file + num_commands: u32, // number of load commands + size_commands: u32, // size of all the load commands + flags: u32, + __reserved: u32, + } + + // usr/include/mach-o/loader.h + #[repr(C)] + struct LoadCommand { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes + } + + /* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ + #[repr(C)] + struct SegmentCommand64 { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes + segment_name: [u8; 16], + vm_addr: u64, // memory address the segment is mapped to + vm_size: u64, // total size of the segment + file_off: u64, // file offset of the segment + file_size: u64, // amount mapped from the file + max_prot: i32, // maximum VM protection + init_prot: i32, // initial VM protection + num_sections: u32, // number of sections in the segment + flags: u32, + } - vector::iterator it = unique(images.image_list_.begin(), - images.image_list_.end()); - images.image_list_.erase(it, images.image_list_.end()); + /* + * Dynamicly linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + */ + #[repr(C)] + struct Dylib { + name: u32, // offset from the load command start to the pathname + timestamp: u32, // library's build time stamp + current_version: u32, // library's current version number + compatibility_version: u32, // library's compatibility vers number + } + + /* + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + */ + #[repr(C)] + struct DylibCommand { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes, including pathname string + dylib: Dylib, // library identification + } + + /* + * The uuid load command contains a single 128-bit unique random number that + * identifies an object produced by the static link editor. + */ + #[repr(C)] + struct UuidCommand { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes + uuid: [u8; 16], + } + + let mach_header_buf = + self.read_task_memory(image.load_address, std::mem::size_of::())?; + + let header: &MachHeader = &*(mach_header_buf.cast()); + + //let header_size = std::mem::size_of::() + header.size_commands; + + if header.magic != MH_MAGIC_64 { + return Err(WriterError::InvalidMachHeader); + } + + // Read the load commands which immediately follow the image header from + // the task memory + let load_commands_buf = self.read_task_memory( + image.load_address + std::mem::size_of::() as u64, + header.size_commands, + )?; + + // Loads commands vary in size depending on the actual type, so we have + // to manually update the pointer offset rather than just stuffing the + // buffer into a slice + let mut next_header = load_commands.buf.as_ptr(); + + struct ImageSizes { + vm_addr: u64, + vm_size: u64, + slide: isize, + } + + let mut image_sizes = None; + let mut image_version = None; + let mut image_uuid = None; + + // TODO: pullout the load command parsing to its own function for testing + for i in 0..header.num_commands { + let header = &*(next_header.cast::()); + + if image_sizes.is_none() && header.cmd == LC_SEGMENT_64 { + let seg: &SegmentCommand64 = &*(next_header.cast()); + + if seg.segment_name[..7] == b"__TEXT\0" { + let slide = if seg.file_off == 0 && seg.file_size != 0 { + image.load_address - seg.vm_addr + } else { + 0 + }; + + image_sizes = Some(ImageSizes { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + + if image_version.is_none() && header.cmd == LC_ID_DYLIB { + let seg: &DylibComand = &*(next_header.cast()); + + image_version = Some(seg.current_version); + } + + if image_uuid.is_none() && header.cmd == LC_UUID { + let seg: &UuidComand = &*(next_header.cast()); + image_uuid = Some(seg.uuid); + } + + if image_sizes.is_some() && image_version.is_some() { + break; + } + + next_header = next_header.offset(header.cmd_size as isize); + } + + let image_sizes = image_sizes.ok_or_else(|| WriterError::InvalidMachHeader)?; + + let file_path = if image.file_path != 0 { + self.read_string(image.file_path)?.unwrap_or_default() + } else { + String::new() + }; + + let module_name = write_string_to_location(buf, &file_path)?; + + let mut raw_module = MDRawModule { + base_of_image: image_sizes.vm_addr + image_sizes.slide, + size_of_image: image_sizes.vm_size as u32, + module_name_rva: module_name.rva, + ..Default::default() + }; + + // Version info is not available for the main executable image since + // it doesn't have a LC_ID_DYLIB load command + if let Some(version) = image_version { + raw_module.version_info.signature = format::VS_FFI_SIGNATURE; + raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; + + // Convert MAC dylib version format, which is a 32 bit number, to the + // format used by minidump. The mac format is <16 bits>.<8 bits>.<8 bits> + // so it fits nicely into the windows version with some massaging + // The mapping is: + // 1) upper 16 bits of MAC version go to lower 16 bits of product HI + // 2) Next most significant 8 bits go to upper 16 bits of product LO + // 3) Least significant 8 bits go to lower 16 bits of product LO + raw_module.version_info.file_version_hi = version >> 16; + raw_module.version_info.file_version_lo = ((version & 0xff00) << 8) | (version & 0xff); + } + + // TODO: write CV record } - fn read_string(&self, addr: u64) -> Result { + /// Reads a null terminated string starting at the specified address from + /// the crashing tasks' memory. + /// + /// This string is capped at 8k which should never be close to being hit as + /// it is only used for file paths for loaded modules, but then again, this + /// is MacOS, so who knows what insanity goes on. + fn read_string(&self, addr: u64) -> Result, WriterError> { // The problem is we don't know how much to read until we know how long // the string is. And we don't know how long the string is, until we've read // the memory! So, we'll try to read kMaxStringLength bytes // (or as many bytes as we can until we reach the end of the vm region). - let size_to_end = { - let mut region_base = addr; - let mut region_size = 0; - let mut nesting_level = 0; - vm_region_submap_info_64 submap_info; - mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64; - - // Get information about the vm region containing |address| - vm_region_recurse_info_t region_info; - region_info = reinterpret_cast(&submap_info); - - kern_return_t result = - mach_vm_region_recurse(target_task, - ®ion_base, - ®ion_size, - &nesting_level, - region_info, - &info_count); - - if (result == KERN_SUCCESS) { - // Get distance from |address| to the end of this region - *size_to_end = region_base + region_size -(mach_vm_address_t)address; - - // If we want to handle strings as long as 4096 characters we may need - // to check if there's a vm region immediately following the first one. - // If so, we need to extend |*size_to_end| to go all the way to the end - // of the second region. - if (*size_to_end < 4096) { - // Second region starts where the first one ends - mach_vm_address_t region_base2 = - (mach_vm_address_t)(region_base + region_size); - mach_vm_size_t region_size2; - - // Get information about the following vm region - result = - mach_vm_region_recurse(target_task, - ®ion_base2, - ®ion_size2, - &nesting_level, - region_info, - &info_count); - - // Extend region_size to go all the way to the end of the 2nd region - if (result == KERN_SUCCESS - && region_base2 == region_base + region_size) { - region_size += region_size2; - } - } - - *size_to_end = region_base + region_size -(mach_vm_address_t)address; - } else { - region_size = 0; - *size_to_end = 0; - } - - return region_size; + let get_region_size = || { + let region = self.get_vm_region(addr)?; + + let mut size_to_end = region.range.end - addr; + + // If the remaining is less than 4k, check if the next region is + // contiguous, and extend the memory that could contain the string + // to include it + if size_to_end < 4 * 1024 { + let maybe_adjacent = self.get_vm_region(region.range.end)?; + + if maybe_adjacent.range.start == region.range.end { + size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; + } + } + + Ok(size_to_end) }; - mach_vm_size_t size_to_end; - GetMemoryRegionSize(target_task, address, &size_to_end); - - if (size_to_end > 0) { - mach_vm_size_t size_to_read = - size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end; - - vector bytes; - if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) != - KERN_SUCCESS) - return string(); - - //============================================================================== - // Returns the size of the memory region containing |address| and the - // number of bytes from |address| to the end of the region. - // We potentially, will extend the size of the original - // region by the size of the following region if it's contiguous with the - // first in order to handle cases when we're reading strings and they - // straddle two vm regions. - // - static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task, - const uint64_t address, - mach_vm_size_t* size_to_end) { - - } + + if let Ok(size_to_end) = get_region_size() { + let mut bytes = self.read_task_memory(addr, size_to_end)?; + + // Find the null terminator and truncate our string + if let Some(null_pos) = bytes.iter().position(|c| c == 0) { + bytes.resize(null_pos, 0); + } + + String::from_utf8(bytes).map(Some)? + } else { + Ok(None) } } } From cbf84f34ad6decc7204eee630e9b408f14ce1c83 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 14 Apr 2022 10:34:05 +0200 Subject: [PATCH 04/53] Add remaining streams --- src/mac/streams.rs | 2 + src/mac/streams/breakpad_info.rs | 24 +++ src/mac/streams/misc_info.rs | 268 +++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 src/mac/streams/breakpad_info.rs create mode 100644 src/mac/streams/misc_info.rs diff --git a/src/mac/streams.rs b/src/mac/streams.rs index 4d149fd6..a9a3a191 100644 --- a/src/mac/streams.rs +++ b/src/mac/streams.rs @@ -1,4 +1,6 @@ +mod breakpad_info; mod memory_list; +mod misc_info; mod module_list; mod system_info; mod thread_list; diff --git a/src/mac/streams/breakpad_info.rs b/src/mac/streams/breakpad_info.rs new file mode 100644 index 00000000..d0981268 --- /dev/null +++ b/src/mac/streams/breakpad_info.rs @@ -0,0 +1,24 @@ +use super::*; +use format::{BreakpadInfoValid, MINIDUMP_BREAKPAD_INFO as BreakpadInfo}; + +impl MiniDumpWriter { + fn write_breakpad_info(&mut self, buffer: &mut DumpBuf) -> Result { + let mut bp_section = MemoryWriter::::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::BreakpadInfoStream as u32, + location: info_section.location(), + }; + + let bp_info = BreakpadInfo { + validity: BreakpadInfoValid::DumpThreadId.bits() + | BreakpadInfoValid::RequestingThreadId.bits(), + // The thread where the exception port handled the exception, might + // be useful to ignore/deprioritize when processing the minidump + dump_thread_id: self.crash_context.handler_thread, + // The actual thread where the exception was thrown + requesting_thread_id: self.crash_context.thread, + }; + + Ok(dirent) + } +} diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs new file mode 100644 index 00000000..7c76101e --- /dev/null +++ b/src/mac/streams/misc_info.rs @@ -0,0 +1,268 @@ +use super::*; +use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; +use std::ffi::c_void; + +#[repr(C)] +struct TimeValue { + seconds: i32, + microseconds: i32, +} + +impl From for std::time::Duration { + fn from(tv: TimeValue) -> Self { + let mut seconds = tv.seconds as u64; + let mut microseconds = tv.microseconds as u32; + // This _probably_ will never happen, but this will avoid a panic in + // Duration::new() if it does + if tv.microseconds >= 1000000 { + seconds += 1; + microseconds -= 1000000; + } + + std::time::Duration::new(seconds, microseconds * 1000) + } +} + +#[repr(C)] +struct MachTaskBasicInfo { + virtual_size: usize, // virtual memory size in bytes + resident_size: usize, // resident memory size in bytes + resident_size_max: usize, // maximum resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads + suspend_count: i32, // suspend count for task +} + +#[repr(C)] +struct TaskThreadsTimeInfo { + user_time: TimeValue, // total user run time for live threads + system_time: TimeValue, // total system run time for live threads +} + +extern "C" { + /// /usr/include/mach/mach_traps.h + /// + /// This seems to be marked as "obsolete" so might disappear at some point? + fn pid_for_task( + task: mach2::port::mach_port_name_t, + pid: *mut i32, + ) -> mach2::kern_return::kern_return_t; +} + +#[repr(C)] +struct VmSpace { + dummy: i32, + dummy2: *const u8, + dummy3: [i32; 5], + dummy4: [*const u8; 3], +} + +#[repr(C)] +struct ExternProc { + starttime: libc::timeval, // process start time, actually a union, but that's an implementation detail + vmspace: *const VmSpace, // Address space + sigacts: *const u8, // Signal actions, state (PROC ONLY) + flag: i32, // P_* flags + stat: i8, // S* process status + pid: libc::pid_t, // pid + oppid: libc::pid_t, // save parent pid during ptrace + dupfd: i32, // sideways return value from fdopen + /* Mach related */ + user_stack: *const u8, // where user stack was allocated, + exit_thread: *const c_void, // Which thread is exiting? + debugger: i32, // allow to debug + sigwait: i32, // indication to suspend + /* scheduling */ + estcpu: u32, // time averaged value of cpticks + cpticks: i32, // tick of cpu time + pctcpu: u32, // %cpu for this process during swtime + wchan: *const c_void, // sleep address + wmesg: *const i8, // reason for sleep + swtime: u32, // time swapped in or out + slptime: u32, // time since last blocked + realtimer: libc::itimerval, // alarm timer + rtime: libc::timeval, // real time + uticks: u64, // statclock hits in user mode + sticks: u64, // statclock hits in system mode + iticks: u64, // statclock hits processing intr + traceflag: i32, // kernel trace points + tracep: *const c_void, // trace to vnode + siglist: i32, // DEPRECATED + textvp: *const c_void, // vnode of executable + holdcnt: i32, // if non-zero, don't swap + sigmask: libc::sigset_t, // DEPRECATED + sigignore: libc::sigset_t, // signals being ignored + sigcatch: libc::sigset_t, // signals being caught by user + priority: u8, // process priority + usrpri: u8, // user-priority based on cpu and nice + nice: i8, // process "nice" value + comm: [i8; 16 /*MAXCOMLEN*/ + 1], + pgrp: *const c_void, // pointer to process group + addr: *const c_void, // kernel virtual addr of u-area (PROC ONLY) + xstat: u16, // exit status for wait; also stop signal + acflag: u16, // accounting flags + ru: *const c_void, // exit information +} + +#[repr(C)] +struct Pcred { + pc_lock: [i8; 72], // opaque content + pc_ucred: *const c_void, // current credentials + ruid: libc::uid_t, // real user id + svuid: libc::uid_t, // saved effective user id + rgid: libc::gid_t, // real group id + svgid: libc::gid_t, // saved effective group id + refcnt: i32, // number of references +} + +#[repr(C)] +struct Ucred { + refcnt: i32, // reference count + uid: libc::uid_t, // effective user id + ngroups: i16, // number of groups + groups: [libc::gid_t; 16], +} + +#[repr(C)] +struct EProc { + paddr: *const c_void, // address of proc + sess: *const c_void, // session pointer + pcred: Pcred, // process credentials + ucred: Ucred, // current credentials + vm: VmSpace, // address space + ppid: libc::pid_t, // parent process id + pgid: libc::gid_t, // process group id + jobc: i16, // job control counter + tdev: i32, // controlling tty dev + tpgid: libc::gid_t, // tty process group id + tsess: *const c_void, // tty session pointer + wmesg: [i8; 8], // wchan message + xsize: i32, // text size + xrssize: i16, // text rss + xccount: i16, // text references + xswrss: i16, + flag: i32, + login: [i8; 12], // short setlogin() name + spare: [i32; 4], +} + +#[repr(C)] +struct KInfoProc { + kp_proc: ExternProc, + kp_eproc: EProc, +} + +impl MiniDumpWriter { + fn write_misc_info(&mut self, buffer: &mut DumpBuf) -> Result { + let mut info_section = MemoryWriter::::alloc(buffer)?; + let dirent = MDRawDirectory { + stream_type: MDStreamType::MiscInfoStream as u32, + location: info_section.location(), + }; + + let mut misc_info = MDRawMiscInfo { + size_of_info: std::mem::size_of::() as u32, + flags1: MiscInfoFlags::MINIDUMP_MISC1_PROCESS_ID.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESS_TIMES.bits() + | MiscInfoFlags::MINIDUMP_MISC1_PROCESSOR_POWER_INFO.bits(), + ..Default::default() + }; + + // Note that Breakpad is using `getrusage` to get process times, but that + // can only get resource usage for the current process and/or children, + // but since we're (most likely) running in a different process than the + // one that has crashed, we instead use the same method that Crashpad + // uses to get the information for the actual crashed process which is + // far more interesting and relevant + // + // SAFETY: syscalls + unsafe { + let mut pid = 0; + kern_ret(|| pid_for_task(self.crash_context.task, &mut pid))?; + + let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_PID, pid]; + let mut kinfo_proc = std::mem::MaybeUninit::::zeroed(); + let mut len = std::mem::size_of::(); + + if libc::sysctl( + mib.as_mut_ptr().cast(), + std::mem::size_of_val(&mib) as u32, + kinfo_proc.as_mut_ptr().cast(), + &mut len, + ) != 0 + { + return Err(std::io::Error::last_os_error().into()); + } + + let kinfo_proc = kinfo_proc.assume_init(); + + // This sysctl does not return an error if the pid was not found. 10.9.5 + // xnu-2422.115.4/bsd/kern/kern_sysctl.c sysctl_prochandle() calls + // xnu-2422.115.4/bsd/kern/kern_proc.c proc_iterate(), which provides no + // indication of whether anything was done. To catch this, check that the PID + // has changed from the 0 + if kinfo_proc.kp_proc.p_pid == 0 { + return Err(); + } + + misc_info.process_create_time = kinfo_proc.kp_proc.starttime.tv_sec as u32; + + // The basic task info keeps the timings for all of the terminated threads + let mut basic_info = std::mem::MaybeUninit::::uninit(); + let mut count = std::mem::size_of::() + / std::mem::size_of::(); + + kern_ret(|| { + mach2::task::task_info( + task, + mach2::task_info::MACH_TASK_BASIC_INFO, + basic_info.as_mut_ptr().cast(), + &mut count, + ) + }) + .ok()?; + + // THe thread times info keeps the timings for all of the living threads + let mut thread_times_info = std::mem::MaybeUninit::::uninit(); + let mut count = std::mem::size_of::() + / std::mem::size_of::(); + + kern_ret(|| { + mach2::task::task_info( + task, + mach2::task_info::TASK_THREAD_TIMES_INFO, + thread_times_info.as_mut_ptr().cast(), + &mut count, + ) + }) + .ok()?; + + let basic_info = basic_info.assume_init(); + let thread_times_info = thread_times_info.assume_init(); + + let user_time: std::time::Duration = + basic_info.user_time.into() + thread_times_info.user_time.into(); + let system_time: std::time::Duration = + basic_info.system_time.into() + thread_times_info.system_time.into(); + + misc_info.process_user_time = user_time.as_secs() as u32; + misc_info.process_kernel_time = system_time.as_secs() as u32; + } + + // Note that neither of these two keys are present on aarch64, at least atm + let max: u64 = sysctl_by_name(b"hw.cpufrequency_max\0"); + let freq: u64 = sysctl_by_name(b"hw.cpufrequency\0"); + + let max = (max / 1000 * 1000) as u32; + let current = (freq / 1000 * 1000) as u32; + + misc_info.processor_max_mhz = max; + misc_info.processor_mhz_limit = max; + misc_info.processor_current_mhz = current; + + info_section.set_value(misc_info); + + Ok(dirent) + } +} From b3c8c10e70a6118df5b7c683e80b8762a663c5a8 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 14 Apr 2022 10:35:49 +0200 Subject: [PATCH 05/53] Begin refactor This starts a refactor to pull out all of the system calls to a separate TaskDumper similar to PTraceDumper to more easily test and maintain the code --- Cargo.toml | 4 + src/mac.rs | 5 + src/mac/errors.rs | 23 +- src/mac/mach_helpers.rs | 454 +++++++++++++++++++++++++++++++++ src/mac/streams.rs | 5 +- src/mac/streams/memory_list.rs | 25 +- src/mac/streams/module_list.rs | 402 +++++++++-------------------- src/mac/streams/thread_list.rs | 166 +----------- src/mac/task_dumper.rs | 304 ++++++++++++++++++++++ 9 files changed, 907 insertions(+), 481 deletions(-) create mode 100644 src/mac/mach_helpers.rs create mode 100644 src/mac/task_dumper.rs diff --git a/Cargo.toml b/Cargo.toml index 272835c2..154d215e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,3 +47,7 @@ mach2 = "0.4" [dev-dependencies] minidump = "0.10" + +[patch.crates-io] +minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "master" } +crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "macos" } diff --git a/src/mac.rs b/src/mac.rs index aaa7f530..826b8c17 100644 --- a/src/mac.rs +++ b/src/mac.rs @@ -1,3 +1,8 @@ +#[cfg(target_pointer_width = "32")] +compile_error!("Various MacOS FFI bindings assume we are on a 64-bit architechture"); + pub mod errors; +mod mach_helpers; pub mod minidump_writer; mod streams; +mod task_dumper; diff --git a/src/mac/errors.rs b/src/mac/errors.rs index 00f99008..77830aaa 100644 --- a/src/mac/errors.rs +++ b/src/mac/errors.rs @@ -1,22 +1,11 @@ use thiserror::Error; -use mach2::kern_return::kern_return_t; - #[derive(Debug, Error)] pub enum WriterError { - #[error("kernel error ({})", _0)] - Kernel(kern_return_t), - #[error("detected an invalid mach image header")] - InvalidMachHeader, -} - -#[inline] -pub(crate) fn kern_ret(func: impl FnOnce() -> kern_return_t) -> Result<(), WriterError> { - let res = func(); - - if res == KERN_SUCCESS { - Ok(()) - } else { - Err(WriterError::Kerne(res)) - } + #[error("unable to find a UUID for a module")] + UnknownUuid, + #[error("unable to find the main executable image for the process")] + NoExecutableImage, + #[error(transparent)] + TaskDumpError(#[from] crate::mac::task_dumper::TaskDumpError), } diff --git a/src/mac/mach_helpers.rs b/src/mac/mach_helpers.rs new file mode 100644 index 00000000..70eef097 --- /dev/null +++ b/src/mac/mach_helpers.rs @@ -0,0 +1,454 @@ +//! Contains various helpers to improve and expand on the bindings provided +//! by `mach2` + +// Just exports all of the mach functions we use into a flat list +pub use mach2::{ + kern_return::KERN_SUCCESS, + task, task_info, + thread_act::thread_get_state, + traps::mach_task_self, + vm::{mach_vm_deallocate, mach_vm_read}, + vm_region::vm_region_submap_info_64, +}; + +/// A Mach kernel error. +/// +/// See . +#[derive(thiserror::Error, Debug)] +pub enum KernelError { + #[error("specified address is not currently valid")] + InvalidAddress = 1, + #[error("specified memory is valid, but does not permit the required forms of access")] + ProtectionFailure = 2, + #[error("the address range specified is already in use, or no address range of the size specified could be found")] + NoSpace = 3, + #[error("the function requested was not applicable to this type of argument, or an argument is invalid")] + InvalidArgument = 4, + #[error("the function could not be performed")] + Failure = 5, + #[error("system resource could not be allocated to fulfill this request")] + ResourceShortage = 6, + #[error("the task in question does not hold receive rights for the port argument")] + NotReceiver = 7, + #[error("bogus access restriction")] + NoAccess = 8, + #[error( + "during a page fault, the target address refers to a memory object that has been destroyed" + )] + MemoryFailure = 9, + #[error( + "during a page fault, the memory object indicated that the data could not be returned" + )] + MemoryError = 10, + #[error("the receive right is already a member of the portset")] + AlreadyInSet = 11, + #[error("the receive right is not a member of a port set")] + NotInSet = 12, + #[error("the name already denotes a right in the task")] + NameExists = 13, + #[error("the operation was aborted")] + Aborted = 14, + #[error("the name doesn't denote a right in the task")] + InvalidName = 15, + #[error("target task isn't an active task")] + InvalidTask = 16, + #[error("the name denotes a right, but not an appropriate right")] + InvalidRight = 17, + #[error("a blatant range error")] + InvalidValue = 18, + #[error("operation would overflow limit on user-references")] + UserRefsOverflow = 19, + #[error("the supplied port capability is improper")] + InvalidCapability = 20, + #[error("the task already has send or receive rights for the port under another name")] + RightExists = 21, + #[error("target host isn't actually a host")] + InvalidHost = 22, + #[error("an attempt was made to supply 'precious' data for memory that is already present in a memory object")] + MemoryPresent = 23, + // These 2 are errors which should only ever be seen by the kernel itself + //MemoryDataMoved = 24, + //MemoryRestartCopy = 25, + #[error("an argument applied to assert processor set privilege was not a processor set control port")] + InvalidProcessorSet = 26, + #[error("the specified scheduling attributes exceed the thread's limits")] + PolicyLimit = 27, + #[error("the specified scheduling policy is not currently enabled for the processor set")] + InvalidPolicy = 28, + #[error("the external memory manager failed to initialize the memory object")] + InvalidObject = 29, + #[error( + "a thread is attempting to wait for an event for which there is already a waiting thread" + )] + AlreadyWaiting = 30, + #[error("an attempt was made to destroy the default processor set")] + DefaultSet = 31, + #[error("an attempt was made to fetch an exception port that is protected, or to abort a thread while processing a protected exception")] + ExceptionProtected = 32, + #[error("a ledger was required but not supplied")] + InvalidLedger = 33, + #[error("the port was not a memory cache control port")] + InvalidMemoryControl = 34, + #[error("an argument supplied to assert security privilege was not a host security port")] + InvalidSecurity = 35, + #[error("thread_depress_abort was called on a thread which was not currently depressed")] + NotDepressed = 36, + #[error("object has been terminated and is no longer available")] + Terminated = 37, + #[error("lock set has been destroyed and is no longer available")] + LockSetDestroyed = 38, + #[error("the thread holding the lock terminated before releasing the lock")] + LockUnstable = 39, + #[error("the lock is already owned by another thread")] + LockOwned = 40, + #[error("the lock is already owned by the calling thread")] + LockOwnedSelf = 41, + #[error("semaphore has been destroyed and is no longer available")] + SemaphoreDestroyed = 42, + #[error("return from RPC indicating the target server was terminated before it successfully replied")] + RpcServerTerminated = 43, + #[error("terminate an orphaned activation")] + RpcTerminateOrphan = 44, + #[error("allow an orphaned activation to continue executing")] + RpcContinueOrphan = 45, + #[error("empty thread activation (No thread linked to it)")] + NotSupported = 46, + #[error("remote node down or inaccessible")] + NodeDown = 47, + #[error("a signalled thread was not actually waiting")] + NotWaiting = 48, + #[error("some thread-oriented operation (semaphore_wait) timed out")] + OperationTimedOut = 49, + #[error("during a page fault, indicates that the page was rejected as a result of a signature check")] + CodesignError = 50, + #[error("the requested property cannot be changed at this time")] + PoicyStatic = 51, + #[error("the provided buffer is of insufficient size for the requested data")] + InsufficientBufferSize = 52, + #[error("denied by security policy")] + Denied = 53, + #[error("the KC on which the function is operating is missing")] + MissingKC = 54, + #[error("the KC on which the function is operating is invalid")] + InvalidKC = 55, + #[error("a search or query operation did not return a result")] + NotFound = 56, +} + +impl From for KernelError { + fn from(kr: mach2::kern_return::kern_return_t) -> Self { + use mach2::kern_return::*; + + match kr { + KERN_INVALID_ADDRESS => Self::InvalidAddress, + KERN_PROTECTED_FAILURE => Self::ProtectionFailure, + KERN_NO_SPACE => Self::NoSpace, + KERN_INVALID_ARGUMENT => Self::InvalidArgument, + KERN_FAILURE => Self::Failure, + KERN_RESOURCE_SHORTAGE => Self::ResourceShortage, + KERN_NOT_RECEIVER => Self::NotReceiver, + KERN_NO_ACCESS => Self::NoAccess, + KERN_MEMORY_FAILURE => Self::MemoryFailure, + KERN_MEMORY_ERROR => Self::MemoryError, + KERN_ALREADY_IN_SET => Self::AlreadyInSet, + KERN_NAME_EXISTS => Self::NameExists, + KERN_INVALID_NAME => Self::InvalidName, + KERN_INVALID_TASK => Self::InvalidTask, + KERN_INVALID_RIGHT => Self::InvalidRight, + KERN_INVALID_VALUE => Self::InvalidValue, + KERN_UREFS_OVERFLOW => Self::UserRefsOverflow, + KERN_INVALID_CAPABILITY => Self::InvalidCapability, + KERN_RIGHT_EXISTS => Self::RightExists, + KERN_INVALID_HOST => Self::InvalidHost, + KERN_MEMORY_PRESENT => Self::MemoryPresent, + KERN_INVALID_PROCESSOR_SET => Self::InvalidProcessorSet, + KERN_POLICY_LIMIT => Self::PolicyLimit, + KERN_INVALID_POLICY => Self::InvalidPolicy, + KERN_INVALID_OBJECT => Self::InvalidObject, + KERN_ALREADY_WAITING => Self::AlreadyWaiting, + KERN_DEFAULT_SET => Self::DefaultSet, + KERN_EXCEPTION_PROTECTED => Self::ExceptionProtected, + KERN_INVALID_LEDGER => Self::InvalidLedger, + KERN_INVALID_MEMORY_CONTROL => Self::InvalidMemoryControl, + KERN_INVALID_SECURITY => Self::InvalidSecurity, + KERN_NOT_DEPRESSED => Self::NotDepressed, + KERN_TERMINATED => Self::Terminated, + KERN_LOCK_SET_DESTROYED => Self::LockSetDestroyed, + KERN_LOCK_UNSTABLE => Self::LockUnstable, + KERN_LOCK_OWNED => Self::LockOwned, + KERN_LOCK_OWNED_SELF => Self::LockOwnedSelf, + KERN_SEMAPHORE_DESTROYED => Self::SemaphoreDestroyed, + KERN_RPC_SERVER_TERMINATED => Self::RpcServerTerminated, + KERN_RPC_TERMINATE_ORPHAN => Self::RpcTerminateOrphan, + KERN_RPC_CONTINUE_ORPHAN => Self::RpcContinueOrphan, + KERN_NOT_SUPPORTED => Self::NotSupported, + KERN_NODE_DOWN => Self::NodeDown, + KERN_NOT_WAITING => Self::NotWaiting, + KERN_OPERATION_TIMED_OUT => Self::OperationTimedOut, + KERN_CODESIGN_ERROR => Self::CodesignError, + KERN_POLICY_STATIC => Self::PoicyStatic, + KERN_INSUFFICIENT_BUFFER_SIZE => Self::InsufficientBufferSize, + KERN_DENIED => Self::Denied, + 54 => Self::MissingKC, + 55 => Self::InvalidKC, + 56 => Self::NotFound, + // This should never happen given a result from a mach call, but + // in that case we just use `Failure` as the mach header itself + // describes it as a catch all + _ => Self::Failure, + } + } +} + +// From /usr/include/mach/machine/thread_state.h +pub const THREAD_STATE_MAX: usize = 1296; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 4; + + type ArchTreadState = mach2::structs::x86_thread_state64_t; + } else if #[cfg(target_arch = "aarch64")] { + /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h + pub const THREAD_STATE_FLAVOR: u32 = 6; + + // Missing from mach2 atm + // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h + #[repr(C)] + struct Arm64ThreadState { + x: [u64; 29], + fp: u64, + lr: u64, + sp: u64, + pc: u64, + cpsr: u32, + __pad: u32, + } + + type ArchTreadState = Arm64ThreadState; + } else { + compile_error!("unsupported target arch"); + } +} + +pub struct ThreadState { + pub state: [u32; THREAD_STATE_MAX], + pub state_size: u32, +} + +impl Default for ThreadState { + fn default() -> Self { + Self { + state: [0u32; THREAD_STATE_MAX], + state_size: THREAD_STATE_MAX * std::mem::size_of::() as u32, + } + } +} + +impl ThreadState { + /// Gets the program counter + pub fn pc(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let inner = self.as_ref(); + inner.__pc + } else if #[cfg(target_arch = "aarch64")] { + let inner = self.as_ref(); + inner.pc + } + } + } + + /// Gets the stack pointer + pub fn sp(&self) -> u64 { + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let inner = self.as_ref(); + inner.__sp + } else if #[cfg(target_arch = "aarch64")] { + let inner = self.as_ref(); + inner.sp + } + } + } +} + +impl AsRef for ThreadState { + fn as_ref(&self) -> &ArchThreadState { + &*(self.state.as_ptr().cast()) + } +} + +/// Minimal trait that just pairs a structure that can be filled out by +/// [`mach2::task::task_info`] with the "flavor" that tells it the info we +/// actually want to retrieve +pub trait TaskInfo { + /// One of the `MACH_*_TASK` integers. I assume it's very bad if you implement + /// this trait and provide the wrong flavor for the struct + const FLAVOR: u32; +} + +// usr/include/mach-o/loader.h, the file type for the main executable image +const MH_EXECUTE: u32 = 0x2; +// usr/include/mach-o/loader.h, magic number for MachHeader +const MH_MAGIC_64: u32 = 0xfeedfacf; +// usr/include/mach-o/loader.h, command to map a segment +const LC_SEGMENT_64: u32 = 0x19; +// usr/include/mach-o/loader.h, dynamically linked shared lib ident +const LC_ID_DYLIB: u32 = 0xd; +// usr/include/mach-o/loader.h, the uuid +const LC_UUID: u32 = 0x1b; + +// usr/include/mach-o/loader.h +#[repr(C)] +pub struct MachHeader { + pub magic: u32, // mach magic number identifier + pub cpu_type: i32, // cpu_type_t cpu specifier + pub cpu_sub_type: i32, // cpu_subtype_t machine specifier + pub file_type: u32, // type of file + pub num_commands: u32, // number of load commands + pub size_commands: u32, // size of all the load commands + pub flags: u32, + __reserved: u32, +} + +// usr/include/mach-o/loader.h +#[repr(C)] +pub struct LoadCommandBase { + pub cmd: u32, // type of load command + pub cmd_size: u32, // total size of the command in bytes +} + +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +#[repr(C)] +pub struct SegmentCommand64 { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes + pub segment_name: [u8; 16], // string name of the section + pub vm_addr: u64, // memory address the segment is mapped to + pub vm_size: u64, // total size of the segment + pub file_off: u64, // file offset of the segment + pub file_size: u64, // amount mapped from the file + pub max_prot: i32, // maximum VM protection + pub init_prot: i32, // initial VM protection + pub num_sections: u32, // number of sections in the segment + pub flags: u32, +} + +/* + * Dynamically linked shared libraries are identified by two things. The + * pathname (the name of the library as found for execution), and the + * compatibility version number. The pathname must match and the compatibility + * number in the user of the library must be greater than or equal to the + * library being used. The time stamp is used to record the time a library was + * built and copied into user so it can be use to determined if the library used + * at runtime is exactly the same as used to built the program. + */ +#[repr(C)] +pub struct Dylib { + pub name: u32, // offset from the load command start to the pathname + pub timestamp: u32, // library's build time stamp + pub current_version: u32, // library's current version number + pub compatibility_version: u32, // library's compatibility vers number +} + +/* + * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) + * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. + * An object that uses a dynamically linked shared library also contains a + * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or + * LC_REEXPORT_DYLIB) for each library it uses. + */ +#[repr(C)] +pub struct DylibCommand { + cmd: u32, // type of load command + cmd_size: u32, // total size of the command in bytes, including pathname string + pub dylib: Dylib, // library identification +} + +/// The uuid load command contains a single 128-bit unique random number that +/// identifies an object produced by the static link editor. +#[repr(C)] +pub struct UuidCommand { + cmd: u32, + cmd_size: u32, + pub uuid: [u8; 16], +} + +/// A block of load commands for a particular image +pub struct LoadCommands { + /// The block of memory containing all of the load commands + pub buf: Vec, + /// The number of actual load commmands that _should_ be in the buffer + pub count: u32, +} + +impl LoadCommands { + fn iter(&self) -> LoadCommandsIter<'_> { + LoadCommandsIter { + buf: &self.buf, + count: self.count, + } + } +} + +pub enum LoadCommand<'buf> { + Segment(&'buf SegmentCommand64), + Dylib(&'buf DylibCommand), + Uuid(&'buf UuidCommand), +} + +pub struct LoadCommandsIter<'buf> { + buffer: &'buf [u8], + count: u32, +} + +impl<'buf> Iterator for LoadCommandsIter<'buf> { + type Item = LoadCommand<'buf>; + + fn next(&mut self) -> Option { + // SAFETY: we're interpreting raw bytes as C structs, we try and be safe + unsafe { + loop { + if self.count == 0 || self.buffer.len() < std::mem::size_of::() { + return None; + } + + let header = &*(self.buffer.as_ptr().cast::()); + + // This would mean we've been lied to by the MachHeader and either + // the size_commands field was too small, or the num_command was + // too large + if header.cmd_size as usize > self.buffer.len() { + return None; + } + + let cmd = match header.cmd { + LC_SEGMENT_64 => Some(&*(self.buffer.as_ptr().cast::())), + LC_ID_DYLIB => Some(&*(self.buffer.as_ptr().cast::())), + LC_UUID => Some(&*(self.buffer.as_ptr().cast::())), + // Just ignore any other load commands + _ => None, + }; + + self.count -= 1; + self.buffer = &self.buffer[header.cmd_size as usize..]; + + if let Some(cmd) = cmd { + return Some(cmd); + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let sz = self.count as usize; + (sz, Some(sz)) + } +} diff --git a/src/mac/streams.rs b/src/mac/streams.rs index a9a3a191..6a87f66b 100644 --- a/src/mac/streams.rs +++ b/src/mac/streams.rs @@ -5,5 +5,8 @@ mod module_list; mod system_info; mod thread_list; -use super::minidump_writer::{DumpBuf, MinidumpWriter}; +use super::{ + minidump_writer::{DumpBuf, MinidumpWriter}, + task_dumper::TaskDumper, +}; use crate::mac::errors::ker_ret; diff --git a/src/mac/streams/memory_list.rs b/src/mac/streams/memory_list.rs index f782cd93..78db79e6 100644 --- a/src/mac/streams/memory_list.rs +++ b/src/mac/streams/memory_list.rs @@ -1,29 +1,18 @@ use super::*; impl MiniDumpWriter { - fn write_memory_list(&mut self, buffer: &mut DumpBuf) -> Result { + fn write_memory_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { // Include some memory around the instruction pointer if the crash was // due to an exception const IP_MEM_SIZE: usize = 256; if self.crash_context.exc_info.is_some() { - let mut thread_state = thread_list_stream::ThreadState::default(); - // SAFETY: syscall - if unsafe { - mach2::thread_act::thread_get_state( - tid, - THREAD_STATE_FLAVOR, - thread_state.state.as_mut_ptr(), - &mut thread_state.state_size, - ) - } == mach2::kern_return::KERN_SUCCESS - { - } else { - None - } - let get_ip_block = |task, tid| -> Option { - let thread_state = Self::get_thread_state(tid).ok()?; + let thread_state = dumper.get_thread_state(tid).ok()?; let ip = thread_state.pc(); @@ -46,7 +35,7 @@ impl MiniDumpWriter { if let Some(ip_range) = get_ip_block() { let size = ip_range.end - ip_range.start; - let stack_buffer = self.read_task_memory(ip_range.start as _, size)?; + let stack_buffer = dumper.read_task_memory(ip_range.start as _, size)?; let ip_location = MDLocationDescriptor { data_size: size as u32, rva: buffer.position() as u32, diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index ce61915d..33987057 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -1,53 +1,15 @@ use super::*; -#[cfg(target_pointer_width = "32")] -compile_error!("this module assumes a 64-bit pointer width"); - -fn all_image_addr(task: mach2::mach_types::task_name_t) -> Option { - let mut task_dyld_info = std::mem::MaybeUninit::::uninit(); - let mut count = std::mem::size_of::() - / std::mem::size_of::(); - - // SAFETY: syscall - kern_ret(|| unsafe { - mach2::task::task_info( - task, - mach2::task_info::TASK_DYLD_INFO, - task_dyld_info.as_mut_ptr().cast(), - &mut count, - ) - }) - .ok()?; - - Some(task_dyld_info.all_image_info_addr) -} - -// dyld_image_info -#[repr(C)] -struct ImageInfo { - load_address: u64, - file_path: u64, - file_mod_date: u64, -} - -// usr/include/mach-o/loader.h, the file type for the main executable image -const MH_EXECUTE: u32 = 0x2; -// usr/include/mach-o/loader.h, magic number for MachHeader -const MH_MAGIC_64: u32 = 0xfeedfacf; -// usr/include/mach-o/loader.h, command to map a segment -const LC_SEGMENT_64: u32 = 0x19; -// usr/include/mach-o/loader.h, dynamically linked shared lib ident -const LC_ID_DYLIB: u32 = 0xd; -// usr/include/mach-o/loader.h, the uuid -const LC_UUID: u32 = 0x1b; - impl MiniDumpWriter { - fn write_module_list(&mut self, buffer: &mut DumpBuf) -> Result { - let modules = if let Some(all_images) = all_image_addr(self.crash_context.task) { - self.read_loaded_modules(all_images)? - } else { - vec![] - }; + fn write_module_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + // The list of modules is pretty critical information, but there could + // still be useful information in the minidump without them if we can't + // retrieve them for some reason + let modules = self.read_loaded_modules(dumper).unwrap_or_default(); let list_header = MemoryWriter::::alloc_with_val(buffer, modules.len() as u32)?; @@ -64,223 +26,99 @@ impl MiniDumpWriter { Ok(dirent) } - fn read_loaded_modules(&self, all_images_addr: u64) -> Result { - // Read the structure inside of dyld that contains information about - // loaded images. We're reading from the desired task's address space. - - // dyld_all_image_infos defined in usr/include/mach-o/dyld_images.h, we - // only need a couple of fields at the beginning - #[repr(C)] - struct AllImagesInfo { - version: u32, // == 1 in Mac OS X 10.4 - info_array_count: u32, - info_array_addr: u64, - } - - // Here we make the assumption that dyld loaded at the same address in - // the crashed process vs. this one. This is an assumption made in - // "dyld_debug.c" and is said to be nearly always valid. - let dyld_all_info_buf = - self.read_task_memory(all_images_addr, std::mem::size_of::())?; - let dyld_info: &AllImagesInfo = &*(dyld_all_info_buf.cast()); - - let dyld_info_buf = self.read_task_memory( - dyld_info.info_array_addr, - dyld_info.info_array_count * std::mem::size_of::(), - )?; - - let all_images = unsafe { - std::slice::from_raw_parts( - dyld_info.buf.as_ptr().cast::(), - dyld_info.info_array_count as usize, - ) - }; - - let mut images = Vec::with_capacity(all_images.len()); - - for image in all_images { - // Apparently MacOS will happily list the same image multiple times - // for some reason, so only add images once - let insert_index = if let Err(i) = - images.binary_search_by(|img| image.load_address.cmp(&img.load_address)) - { - i - } else { - continue; - }; - - if let Ok(module) = self.read_module(image) { - images.insert(insert_index, module); + fn read_loaded_modules( + &self, + buf: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result, WriterError> { + let mut images = dumper.read_images()?; + + // Apparently MacOS will happily list the same image multiple times + // for some reason, so sort the images by load address and remove all + // of the duplicates + images.sort(); + images.dedup(); + + let mut modules = Vec::with_capacity(images.len()); + let mut has_main_executable = false; + + for image in images { + if let Ok((module, is_main_executable)) = self.read_module(image) { + // We want to keep the modules sorted by their load address except + // in the case of the main executable image which we want to put + // first as it is most likely the culprit, or at least generally + // the most interesting module for human and machine inspectors + if is_main_executable { + modules.insert(0, module); + has_main_executable = true; + } else { + modules.push(module) + }; } } - // The modules are sorted by load address, but we always want the - // main executable to be first in the minidump - - Ok(images) - } - - fn read_module(&self, image: ImageInfo, buf: &mut DumpBuf) -> Result { - // usr/include/mach-o/loader.h - #[repr(C)] - struct MachHeader { - magic: u32, // mach magic number identifier - cpu_type: i32, // cpu_type_t cpu specifier - cpu_sub_type: i32, // cpu_subtype_t machine specifier - file_type: u32, // type of file - num_commands: u32, // number of load commands - size_commands: u32, // size of all the load commands - flags: u32, - __reserved: u32, - } - - // usr/include/mach-o/loader.h - #[repr(C)] - struct LoadCommand { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes - } - - /* - * The 64-bit segment load command indicates that a part of this file is to be - * mapped into a 64-bit task's address space. If the 64-bit segment has - * sections then section_64 structures directly follow the 64-bit segment - * command and their size is reflected in cmdsize. - */ - #[repr(C)] - struct SegmentCommand64 { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes - segment_name: [u8; 16], - vm_addr: u64, // memory address the segment is mapped to - vm_size: u64, // total size of the segment - file_off: u64, // file offset of the segment - file_size: u64, // amount mapped from the file - max_prot: i32, // maximum VM protection - init_prot: i32, // initial VM protection - num_sections: u32, // number of sections in the segment - flags: u32, - } - - /* - * Dynamicly linked shared libraries are identified by two things. The - * pathname (the name of the library as found for execution), and the - * compatibility version number. The pathname must match and the compatibility - * number in the user of the library must be greater than or equal to the - * library being used. The time stamp is used to record the time a library was - * built and copied into user so it can be use to determined if the library used - * at runtime is exactly the same as used to built the program. - */ - #[repr(C)] - struct Dylib { - name: u32, // offset from the load command start to the pathname - timestamp: u32, // library's build time stamp - current_version: u32, // library's current version number - compatibility_version: u32, // library's compatibility vers number - } - - /* - * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) - * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. - * An object that uses a dynamically linked shared library also contains a - * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or - * LC_REEXPORT_DYLIB) for each library it uses. - */ - #[repr(C)] - struct DylibCommand { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes, including pathname string - dylib: Dylib, // library identification - } - - /* - * The uuid load command contains a single 128-bit unique random number that - * identifies an object produced by the static link editor. - */ - #[repr(C)] - struct UuidCommand { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes - uuid: [u8; 16], - } - - let mach_header_buf = - self.read_task_memory(image.load_address, std::mem::size_of::())?; - - let header: &MachHeader = &*(mach_header_buf.cast()); - - //let header_size = std::mem::size_of::() + header.size_commands; - - if header.magic != MH_MAGIC_64 { - return Err(WriterError::InvalidMachHeader); + if !has_main_executable { + Err(WriterError::NoExecutableImage) + } else { + Ok(images) } + } - // Read the load commands which immediately follow the image header from - // the task memory - let load_commands_buf = self.read_task_memory( - image.load_address + std::mem::size_of::() as u64, - header.size_commands, - )?; - - // Loads commands vary in size depending on the actual type, so we have - // to manually update the pointer offset rather than just stuffing the - // buffer into a slice - let mut next_header = load_commands.buf.as_ptr(); - + fn read_module( + &self, + image: ImageInfo, + buf: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result<(MDRawModule, bool), WriterError> { struct ImageSizes { vm_addr: u64, vm_size: u64, slide: isize, } - let mut image_sizes = None; - let mut image_version = None; - let mut image_uuid = None; - - // TODO: pullout the load command parsing to its own function for testing - for i in 0..header.num_commands { - let header = &*(next_header.cast::()); - - if image_sizes.is_none() && header.cmd == LC_SEGMENT_64 { - let seg: &SegmentCommand64 = &*(next_header.cast()); - - if seg.segment_name[..7] == b"__TEXT\0" { - let slide = if seg.file_off == 0 && seg.file_size != 0 { - image.load_address - seg.vm_addr - } else { - 0 - }; - - image_sizes = Some(ImageSizes { - vm_addr: seg.vm_addr, - vm_size: seg.vm_size, - slide, - }); + let mut sizes = None; + let mut version = None; + let mut uuid = None; + + { + let load_commands = dumper.get_load_commands(&image)?; + + for lc in load_commands.iter() { + match lc { + mach::LoadCommand::Segment(seg) if sizes.is_none() => { + if seg.segment_name[..7] == b"__TEXT\0" { + let slide = if seg.file_off == 0 && seg.file_size != 0 { + image.load_address - seg.vm_addr + } else { + 0 + }; + + sizes = Some(ImageSizes { + vm_addr: seg.vm_addr, + vm_size: seg.vm_size, + slide, + }); + } + } + mach::LoadCommand::Dylib(dylib) if version.is_none() => { + version = Some(dylib.current_version); + } + mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { + uuid = Some(img_id.uuid); + } } - } - - if image_version.is_none() && header.cmd == LC_ID_DYLIB { - let seg: &DylibComand = &*(next_header.cast()); - - image_version = Some(seg.current_version); - } - - if image_uuid.is_none() && header.cmd == LC_UUID { - let seg: &UuidComand = &*(next_header.cast()); - image_uuid = Some(seg.uuid); - } - if image_sizes.is_some() && image_version.is_some() { - break; + if image_sizes.is_some() && image_version.is_some() && image_uuid.is_some() { + break; + } } - - next_header = next_header.offset(header.cmd_size as isize); } let image_sizes = image_sizes.ok_or_else(|| WriterError::InvalidMachHeader)?; + let uuid = image_uuid.ok_or_else(|| WriterError::UnknownUuid)?; let file_path = if image.file_path != 0 { - self.read_string(image.file_path)?.unwrap_or_default() + dumper.read_string(image.file_path)?.unwrap_or_default() } else { String::new() }; @@ -295,8 +133,8 @@ impl MiniDumpWriter { }; // Version info is not available for the main executable image since - // it doesn't have a LC_ID_DYLIB load command - if let Some(version) = image_version { + // it doesn't issue a LC_ID_DYLIB load command + if let Some(version) = &image_version { raw_module.version_info.signature = format::VS_FFI_SIGNATURE; raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; @@ -311,50 +149,40 @@ impl MiniDumpWriter { raw_module.version_info.file_version_lo = ((version & 0xff00) << 8) | (version & 0xff); } - // TODO: write CV record - } - - /// Reads a null terminated string starting at the specified address from - /// the crashing tasks' memory. - /// - /// This string is capped at 8k which should never be close to being hit as - /// it is only used for file paths for loaded modules, but then again, this - /// is MacOS, so who knows what insanity goes on. - fn read_string(&self, addr: u64) -> Result, WriterError> { - // The problem is we don't know how much to read until we know how long - // the string is. And we don't know how long the string is, until we've read - // the memory! So, we'll try to read kMaxStringLength bytes - // (or as many bytes as we can until we reach the end of the vm region). - let get_region_size = || { - let region = self.get_vm_region(addr)?; - - let mut size_to_end = region.range.end - addr; - - // If the remaining is less than 4k, check if the next region is - // contiguous, and extend the memory that could contain the string - // to include it - if size_to_end < 4 * 1024 { - let maybe_adjacent = self.get_vm_region(region.range.end)?; + let module_name = if let Some(sep_index) = file_path.rfind('/') { + &file_path[sep_index + 1..] + } else if file_path.is_empty() { + "" + } else { + &file_path + }; - if maybe_adjacent.range.start == region.range.end { - size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; - } - } + #[derive(scroll::Pwrite, scroll::SizeWith)] + struct CvInfoPdb { + cv_signature: u32, + signature: format::GUID, + age: u32, + } - Ok(size_to_end) - }; + let cv = MemoryWriter::alloc_with_val( + buf, + CvInfoPdb { + cv_signature: format::CvSignature::Pdb70, + age: 0, + signature: uuid.into(), + }, + )?; - if let Ok(size_to_end) = get_region_size() { - let mut bytes = self.read_task_memory(addr, size_to_end)?; + // Note that we don't use write_string_to_location here as the module + // name is a simple 8-bit string, not 16-bit like most other strings + // in the minidump, and is directly part of the record itself, not an rva + buf.write_all(module_name.as_bytes())?; + buf.write_all(&[0])?; // null terminator - // Find the null terminator and truncate our string - if let Some(null_pos) = bytes.iter().position(|c| c == 0) { - bytes.resize(null_pos, 0); - } + let mut cv_location = cv.location(); + cv_location.size += module_name.len() as u32 + 1; + raw_module.cv_record = cv_location; - String::from_utf8(bytes).map(Some)? - } else { - Ok(None) - } + Ok((raw_module, image_version.is_none())) } } diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index 6e54f834..ffd5872a 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -1,66 +1,11 @@ use super::*; -// From /usr/include/mach/machine/thread_state.h -const THREAD_STATE_MAX: usize = 1296; - -cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h - const THREAD_STATE_FLAVOR: u32 = 4; - } else if #[cfg(target_arch = "aarch64")] { - /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h - const THREAD_STATE_FLAVOR: u32 = 6; - - // Missing from mach2 atm - // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h - #[repr(C)] - struct Arm64ThreadState { - x: [u64; 29], - fp: u64, - lr: u64, - sp: u64, - pc: u64, - cpsr: u32, - __pad: u32, - } - } -} - -struct ThreadState { - state: [u32; THREAD_STATE_MAX], - state_size: u32, -} - -impl Default for ThreadState { - fn default() -> Self { - Self { - state: [0u32; THREAD_STATE_MAX], - state_size: THREAD_STATE_MAX * std::mem::size_of::() as u32, - } - } -} - -impl ThreadState { - pub fn pc(&self) -> u64 { - cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - let x86_64_state: &mach2::structs::x86_thread_state64_t = &*(thread_state.state.as_ptr().cast()); - x86_64_state.__pc - } else if #[cfg(target_arch = "aarch64")] { - let aarch64_state: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); - aarch64_state.pc - } - } - } -} - -pub(crate) struct VMRegionInfo { - pub(crate) info: mach2::vm_region::vm_region_submap_info_64, - pub(crate) range: std::ops::Range, -} - impl MinidumpWriter { - fn write_thread_list(&mut self, buffer: &mut DumpBuf) -> Result { + fn write_thread_list( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { // Retrieve the list of threads from the task that crashed. // SAFETY: syscall let mut threads = std::ptr::null_mut(); @@ -108,18 +53,7 @@ impl MinidumpWriter { let thread_state = Self::get_thread_state(tid)?; - cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - let x86_64_state: &mach2::structs::x86_thread_state64_t = &*(thread_state.state.as_ptr().cast()); - - self.write_stack_from_start_address(x86_64_state.__rsp, buffer, &mut thread)?; - } else if #[cfg(target_arch = "aarch64")] { - let aarch64_state: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); - self.write_stack_from_start_address(aarch64_state.sp, buffer, &mut thread)?; - } else { - compile_error!("unsupported target arch"); - } - } + self.write_stack_from_start_address(thread_state.sp(), buffer, &mut thread)?; let mut cpu: RawContextCPU = Default::default(); Self::fill_cpu_context(thread_state, &mut cpu); @@ -128,22 +62,6 @@ impl MinidumpWriter { Ok(thread) } - fn get_thread_state(tid: u32) -> Result { - let mut thread_state = ThreadState::default(); - - // SAFETY: syscall - kern_ret(|| unsafe { - mach2::thread_act::thread_get_state( - tid, - THREAD_STATE_FLAVOR, - thread_state.state.as_mut_ptr(), - &mut thread_state.state_size, - ) - })?; - - Ok(thread_state) - } - fn write_stack_from_start_address( &mut self, start: u64, @@ -231,46 +149,12 @@ impl MinidumpWriter { stack_region_base + stack_region_size - start_addr } - fn read_task_memory(&self, address: u64, length: usize) -> Result, WriterError> { - let sys_page_size = libc::getpagesize(); - - // use the negative of the page size for the mask to find the page address - let page_address = address & (-sys_page_size); - let last_page_address = (address + length + (sys_page_size - 1)) & (-sys_page_size); - - let page_size = last_page_address - page_address; - let mut local_start = std::ptr::null_mut(); - let mut local_length = 0; - - kern_ret(|| unsafe { - mach2::vm::mach_vm_read( - self.crash_context.task, - page_address, - page_size, - &mut local_start, - &mut local_length, - ) - })?; - - let mut buffer = Vec::with_capacity(length); - - let task_buffer = - std::slice::from_raw_parts(local_start.offset(address - page_address), length); - buffer.extend_from_slice(task_buffer); - - // Don't worry about the return here, if something goes wrong there's probably - // not much we can do about, and we have what we want anyways - mach2::vm::mach_vm_deallocate(mach2::traps::mach_task_self(), local_start, local_length); - - Ok(buffer) - } - fn fill_cpu_context(thread_state: &ThreadState, out: &mut RawContextCPU) { cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { out.context_flags = format::ContextFlagsCpu::CONTEXT_AMD64.bits(); - let ts: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + let ts = thread_state.as_ref(); out.rax = ts.__rax; out.rbx = ts.__rbx; @@ -301,7 +185,7 @@ impl MinidumpWriter { // This is kind of a lie as we don't actually include the full float state..? out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; - let ts: &Arm64ThreadState = &*(thread_state.state.as_ptr().cast()); + let ts = thread_state.as_ref(); out.cpsr = ts.cpsr; out.iregs[..28].copy_from_slice(&ts.x[..28]); @@ -314,38 +198,4 @@ impl MinidumpWriter { } } } - - fn get_vm_region(&self, addr: u64) -> Result { - let mut region_base = addr; - let mut region_size = 0; - let mut nesting_level = 0; - let mut region_info = 0; - let mut submap_info = std::mem::MaybeUninit::::uninit(); - - // mach/vm_region.h - const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = - (std::mem::size_of::() - / std::mem::size_of::()) as u32; - - let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; - - kern_ret(|| - // SAFETY: syscall - unsafe { - mach2::vm::mach_vm_region_recurse( - self.crash_context.task, - &mut region_base, - &mut region_size, - &mut nesting_level, - submap_info.as_mut_ptr().cast(), - &mut info_count, - ) - })?; - - Ok(VMRegionInfo { - // SAFETY: this will be valid if the syscall succeeded - info: unsafe { submap_info.assume_init() }, - range: region_base..region_base + region_base, - }) - } } diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs new file mode 100644 index 00000000..3694839e --- /dev/null +++ b/src/mac/task_dumper.rs @@ -0,0 +1,304 @@ +use crate::mac::mach_helpers as mach; +use mach2::mach_types as mt; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum TaskDumpError { + #[error("kernel error {syscall} {error})")] + Kernel { + syscall: &'static str, + error: mach::KernelError, + }, + #[error("detected an invalid mach image header")] + InvalidMachHeader, +} + +/// Wraps a mach call in a Result +macro_rules! mach_call { + ($call:expr) => {{ + // SAFETY: syscall + let kr = unsafe { $call }; + if kr == mach::KERN_SUCCESS { + Ok(()) + } else { + // This is ugly, improvements to the macro welcome! + let mut syscall = stringify!($call); + if let Some(i) = sc.find('(') { + syscall = &syscall[..i]; + } + Err(TaskDumpError::Kernel { + syscall, + error: kr.into(), + }) + } + }}; +} + +// dyld_image_info +#[repr(C)] +pub struct ImageInfo { + load_address: u64, + file_path: u64, + file_mod_date: u64, +} + +impl PartialEq for ImageInfo { + fn eq(&self, o: &Self) -> bool { + self.load_address == o.load_address + } +} + +impl Eq for ImageInfo {} + +impl Ord for ImageInfo { + fn cmp(&self, o: &Self) -> std::cmp::Ordering { + self.load_address.cmp(&o.load_address) + } +} + +impl PartialOrd for ImageInfo { + fn partial_cmp(&self, o: &Self) -> Option { + Some(self.cmp(o)) + } +} + +/// Describes a region of virtual memory +pub struct VMRegionInfo { + pub info: mach::vm_region_submap_info_64, + pub range: std::ops::Range, +} + +/// Similarly to PtraceDumper for Linux, this provides access to information +/// for a task (MacOS process) +pub struct TaskDumper { + task: mt::task_t, + page_size: usize, +} + +impl TaskDumper { + /// Constructs a [`TaskDumper`] for the specified task + pub fn new(task: mt::task_t) -> Self { + Self { + task, + // SAFETY: syscall + page_size: unsafe { libc::getpagesize() }, + } + } + + /// Reads a block of memory from the task + pub fn read_task_memory( + &self, + address: u64, + count: usize, + ) -> Result, TaskDumpError> { + let length = count * std::mem::size_of::(); + + // use the negative of the page size for the mask to find the page address + let page_address = address & -self.page_size; + let last_page_address = (address + length + self.page_size - 1) & -self.page_size; + + let page_size = last_page_address - page_address; + let mut local_start = std::ptr::null_mut(); + let mut local_length = 0; + + mach_call!(mach::mach_vm_read( + self.task, + page_address, + page_size, + &mut local_start, + &mut local_length + ))?; + + let mut buffer = Vec::with_capacity(count); + + let task_buffer = + std::slice::from_raw_parts(local_start.offset(address - page_address).cast(), count); + buffer.extend_from_slice(task_buffer); + + // Don't worry about the return here, if something goes wrong there's probably + // not much we can do about it, and we have what we want anyways + let _res = mach_call!(mach::mach_vm_deallocate( + mach::mach_task_self(), + local_start, + local_length + )); + + Ok(buffer) + } + + /// Reads a null terminated string starting at the specified address. This + /// is a specialization of [`read_task_memory`] since strings can span VM + /// regions. + /// + /// This string is capped at 8k which should never be close to being hit as + /// it is only used for file paths for loaded modules, but then again, this + /// is MacOS, so who knows what insanity goes on. + /// + /// # Errors + /// + /// Fails if the address cannot be read for some reason, or the string is + /// not utf-8. + fn read_string(&self, addr: u64) -> Result, TaskDumpError> { + // The problem is we don't know how much to read until we know how long + // the string is. And we don't know how long the string is, until we've read + // the memory! So, we'll try to read kMaxStringLength bytes + // (or as many bytes as we can until we reach the end of the vm region). + let get_region_size = || { + let region = self.get_vm_region(addr)?; + + let mut size_to_end = region.range.end - addr; + + // If the remaining is less than 4k, check if the next region is + // contiguous, and extend the memory that could contain the string + // to include it + if size_to_end < 4 * 1024 { + let maybe_adjacent = self.get_vm_region(region.range.end)?; + + if maybe_adjacent.range.start == region.range.end { + size_to_end += maybe_adjacent.range.end - maybe_adjacent.range.start; + } + } + + Ok(size_to_end) + }; + + if let Ok(size_to_end) = get_region_size() { + let mut bytes = self.read_task_memory(addr, size_to_end)?; + + // Find the null terminator and truncate our string + if let Some(null_pos) = bytes.iter().position(|c| c == 0) { + bytes.resize(null_pos, 0); + } + + String::from_utf8(bytes).map(Some)? + } else { + Ok(None) + } + } + + /// Retrives information on the virtual memory region the specified address + /// is located within + pub fn get_vm_region(&self, addr: u64) -> Result { + let mut region_base = addr; + let mut region_size = 0; + let mut nesting_level = 0; + let mut region_info = 0; + let mut submap_info = std::mem::MaybeUninit::::uninit(); + + // mach/vm_region.h + const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = + (std::mem::size_of::() / std::mem::size_of::()) + as u32; + + let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; + + mach_call!(mach_vm_region_recurse( + self.task, + &mut region_base, + &mut region_size, + &mut nesting_level, + submap_info.as_mut_ptr().cast(), + &mut info_count, + ))?; + + Ok(VMRegionInfo { + // SAFETY: this will be valid if the syscall succeeded + info: unsafe { submap_info.assume_init() }, + range: region_base..region_base + region_size, + }) + } + + /// Retrieves the state of the specified thread. The state is is an architecture + /// specific block of CPU context ie register state. + pub fn read_thread_state(&self, tid: u32) -> Result { + let mut thread_state = mach::ThreadState::default(); + + mach_call!(mach::thread_get_state( + tid, + THREAD_STATE_FLAVOR, + thread_state.state.as_mut_ptr(), + &mut thread_state.state_size, + ))?; + + Ok(thread_state) + } + + /// Reads the specified task information + pub fn task_info(&self) -> Result { + let mut info = std::mem::MaybeUninit::::uninit(); + let mut count = (std::mem::size_of::() / std::mem::size_of::()) as u32; + + mach_call!(mach::task::task_info( + self.task, + T::FLAVOR, + info.as_mut_ptr().cast(), + &mut count + ))?; + + // SAFETY: this will be initialized if the call succeeded + unsafe { Ok(info.assume_init()) } + } + + /// Retrieves all of the images loaded in the task. Note that there may be + /// multiple images with the same load address. + pub fn read_images(&self) -> Result, TaskDumpError> { + impl mach::TaskInfo for mach::task_info::task_dyld_info { + const FLAVOR: mach::task_info::TASK_DYLD_INFO; + } + + // Retrieve the address at which the list of loaded images is located + // within the task + let all_images_addr = { + let dyld_info = self.task_info::()?; + dyld_info.all_image_info_addr + }; + + // dyld_all_image_infos defined in usr/include/mach-o/dyld_images.h, we + // only need a couple of fields at the beginning + #[repr(C)] + struct AllImagesInfo { + version: u32, // == 1 in Mac OS X 10.4 + info_array_count: u32, + info_array_addr: u64, + } + + // Here we make the assumption that dyld loaded at the same address in + // the crashed process vs. this one. This is an assumption made in + // "dyld_debug.c" and is said to be nearly always valid. + let dyld_all_info_buf = + self.read_task_memory::(all_images_addr, std::mem::size_of::())?; + // SAFETY: this is fine as long as the kernel isn't lying to us + let all_dyld_info: &AllImagesInfo = unsafe { &*(dyld_all_info_buf.as_ptr().cast()) }; + + self.read_task_memory::( + all_dyld_info.info_array_addr, + all_dyld_info.info_array_count as usize, + ) + } + + /// Retrieves the load commands for the specified image + pub fn read_load_commands(&self, img: &ImageInfo) -> Result { + let mach_header_buf = + self.read_task_memory::(img.load_address, std::mem::size_of::())?; + + let header: &mach::MachHeader = &*(mach_header_buf.as_ptr().cast()); + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + // Read the load commands which immediately follow the image header from + // the task memory. Note that load commands vary in size so we need to + // retrieve the memory as a raw byte buffer that we can then iterate + // through and step according to the size of each load command + let load_commands_buf = self.read_task_memory::( + image.load_address + std::mem::size_of::() as u64, + header.size_commands as usize, + )?; + + Ok(mach::LoadComands { + buffer: load_commands_buf, + count: header.num_commands, + }) + } +} From b439d66fba9c7c4e674a1c3c3952918070d7c98a Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 14 Apr 2022 17:29:22 +0200 Subject: [PATCH 06/53] Update branch name for path --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 154d215e..ea98e7a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,5 +49,5 @@ mach2 = "0.4" minidump = "0.10" [patch.crates-io] -minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "master" } +minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "macos" } From 9c0fe449f14c6c0871c245b3cf0e55ce155656d2 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 14 Apr 2022 17:30:35 +0200 Subject: [PATCH 07/53] Make DirSection shared --- src/dir_section.rs | 103 +++++++++++++++++++++++++++++++++++ src/lib.rs | 12 ++-- src/linux/errors.rs | 9 +-- src/linux/minidump_writer.rs | 91 +------------------------------ src/linux/sections.rs | 3 +- src/mac/errors.rs | 8 +-- src/mac/minidump_writer.rs | 84 ---------------------------- 7 files changed, 121 insertions(+), 189 deletions(-) create mode 100644 src/dir_section.rs diff --git a/src/dir_section.rs b/src/dir_section.rs new file mode 100644 index 00000000..63b2e39c --- /dev/null +++ b/src/dir_section.rs @@ -0,0 +1,103 @@ +use crate::{ + mem_writer::{Buffer, MemoryArrayWriter, MemoryWriterError}, + minidump_format::MDRawDirectory, +}; +use std::io::{Error, Seek, SeekFrom, Write}; + +pub type DumpBuf = Buffer; + +#[derive(Debug, thiserror::Error)] +pub enum FileWriterError { + #[error("IO error")] + IOError(#[from] Error), + #[error("Failed to write to memory")] + MemoryWriterError(#[from] MemoryWriterError), +} + +/// Utility that wraps writing minidump directory entries to an I/O stream, generally +/// a [`std::fs::File`]. +#[derive(Debug)] +pub struct DirSection<'a, W> +where + W: Write + Seek, +{ + curr_idx: usize, + section: MemoryArrayWriter, + /// If we have to append to some file, we have to know where we currently are + destination_start_offset: u64, + destination: &'a mut W, + last_position_written_to_file: u64, +} + +impl<'a, W> DirSection<'a, W> +where + W: Write + Seek, +{ + pub fn new( + buffer: &mut DumpBuf, + index_length: u32, + destination: &'a mut W, + ) -> std::result::Result { + let dir_section = + MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; + + Ok(Self { + curr_idx: 0, + section: dir_section, + destination_start_offset: destination.seek(SeekFrom::Current(0))?, + destination, + last_position_written_to_file: 0, + }) + } + + #[inline] + pub fn position(&self) -> u32 { + self.section.position + } + + pub fn dump_dir_entry( + &mut self, + buffer: &mut DumpBuf, + dirent: MDRawDirectory, + ) -> std::result::Result<(), FileWriterError> { + self.section.set_value_at(buffer, dirent, self.curr_idx)?; + + // Now write it to file + + // First get all the positions + let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; + let idx_pos = self.section.location_of_index(self.curr_idx); + self.curr_idx += 1; + + self.destination.seek(std::io::SeekFrom::Start( + self.destination_start_offset + idx_pos.rva as u64, + ))?; + let start = idx_pos.rva as usize; + let end = (idx_pos.rva + idx_pos.data_size) as usize; + self.destination.write_all(&buffer[start..end])?; + + // Reset file-position + self.destination + .seek(std::io::SeekFrom::Start(curr_file_pos))?; + + Ok(()) + } + + /// Writes 2 things to file: + /// 1. The given dirent into the dir section in the header (if any is given) + /// 2. Everything in the in-memory buffer that was added since the last call to this function + pub fn write_to_file( + &mut self, + buffer: &mut DumpBuf, + dirent: Option, + ) -> std::result::Result<(), FileWriterError> { + if let Some(dirent) = dirent { + self.dump_dir_entry(buffer, dirent)?; + } + + let start_pos = self.last_position_written_to_file as usize; + self.destination.write_all(&buffer[start_pos..])?; + self.last_position_written_to_file = buffer.position(); + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 13c5f82e..00ce6074 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,11 @@ cfg_if::cfg_if! { pub mod minidump_cpu; pub mod minidump_format; -/// Non-windows platforms need additional code since they are essentially -/// replicating functionality we get for free on Windows -#[cfg(not(target_os = "windows"))] -pub(crate) mod mem_writer; +// Non-windows platforms need additional code since they are essentially +// replicating functionality we get for free on Windows +cfg_if::cfg_if! { + if #[cfg(not(target_os = "windows"))] { + pub(crate) mod mem_writer; + pub(crate) mod dir_section; + } +} diff --git a/src/linux/errors.rs b/src/linux/errors.rs index 7caba1ce..e423389f 100644 --- a/src/linux/errors.rs +++ b/src/linux/errors.rs @@ -1,3 +1,4 @@ +use crate::dir_section::FileWriterError; use crate::maps_reader::MappingInfo; use crate::mem_writer::MemoryWriterError; use crate::thread_info::Pid; @@ -193,14 +194,6 @@ pub enum SectionDsoDebugError { FromUTF8Error(#[from] std::string::FromUtf8Error), } -#[derive(Debug, Error)] -pub enum FileWriterError { - #[error("IO error")] - IOError(#[from] std::io::Error), - #[error("Failed to write to memory")] - MemoryWriterError(#[from] MemoryWriterError), -} - #[derive(Debug, Error)] pub enum WriterError { #[error("Error during init phase")] diff --git a/src/linux/minidump_writer.rs b/src/linux/minidump_writer.rs index 1510f198..245c6fea 100644 --- a/src/linux/minidump_writer.rs +++ b/src/linux/minidump_writer.rs @@ -1,9 +1,10 @@ use crate::{ + dir_section::{DirSection, DumpBuf}, linux::{ app_memory::AppMemoryList, crash_context::CrashContext, dso_debug, - errors::{FileWriterError, InitError, WriterError}, + errors::{InitError, WriterError}, maps_reader::{MappingInfo, MappingList}, ptrace_dumper::PtraceDumper, sections::*, @@ -12,93 +13,7 @@ use crate::{ mem_writer::{Buffer, MemoryArrayWriter, MemoryWriter, MemoryWriterError}, minidump_format::*, }; -use std::io::{Seek, SeekFrom, Write}; - -pub type DumpBuf = Buffer; - -#[derive(Debug)] -pub struct DirSection<'a, W> -where - W: Write + Seek, -{ - curr_idx: usize, - section: MemoryArrayWriter, - /// If we have to append to some file, we have to know where we currently are - destination_start_offset: u64, - destination: &'a mut W, - last_position_written_to_file: u64, -} - -impl<'a, W> DirSection<'a, W> -where - W: Write + Seek, -{ - fn new( - buffer: &mut DumpBuf, - index_length: u32, - destination: &'a mut W, - ) -> std::result::Result { - let dir_section = - MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; - Ok(DirSection { - curr_idx: 0, - section: dir_section, - destination_start_offset: destination.seek(SeekFrom::Current(0))?, - destination, - last_position_written_to_file: 0, - }) - } - - fn position(&self) -> u32 { - self.section.position - } - - fn dump_dir_entry( - &mut self, - buffer: &mut DumpBuf, - dirent: MDRawDirectory, - ) -> std::result::Result<(), FileWriterError> { - self.section.set_value_at(buffer, dirent, self.curr_idx)?; - - // Now write it to file - - // First get all the positions - let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; - let idx_pos = self.section.location_of_index(self.curr_idx); - self.curr_idx += 1; - - self.destination.seek(std::io::SeekFrom::Start( - self.destination_start_offset + idx_pos.rva as u64, - ))?; - let start = idx_pos.rva as usize; - let end = (idx_pos.rva + idx_pos.data_size) as usize; - self.destination.write_all(&buffer[start..end])?; - - // Reset file-position - self.destination - .seek(std::io::SeekFrom::Start(curr_file_pos))?; - - Ok(()) - } - - /// Writes 2 things to file: - /// 1. The given dirent into the dir section in the header (if any is given) - /// 2. Everything in the in-memory buffer that was added since the last call to this function - fn write_to_file( - &mut self, - buffer: &mut DumpBuf, - dirent: Option, - ) -> std::result::Result<(), FileWriterError> { - if let Some(dirent) = dirent { - self.dump_dir_entry(buffer, dirent)?; - } - - let start_pos = self.last_position_written_to_file as usize; - self.destination.write_all(&buffer[start_pos..])?; - self.last_position_written_to_file = buffer.position(); - Ok(()) - } -} +use std::io::{Seek, Write}; pub enum CrashingThreadContext { None, diff --git a/src/linux/sections.rs b/src/linux/sections.rs index d898ac5e..c7c4172c 100644 --- a/src/linux/sections.rs +++ b/src/linux/sections.rs @@ -7,9 +7,10 @@ pub mod thread_list_stream; pub mod thread_names_stream; use crate::{ + dir_section::DumpBuf, errors::{self}, linux::{ - minidump_writer::{self, DumpBuf, MinidumpWriter}, + minidump_writer::{self, MinidumpWriter}, ptrace_dumper::PtraceDumper, }, mem_writer::*, diff --git a/src/mac/errors.rs b/src/mac/errors.rs index 77830aaa..ea85162b 100644 --- a/src/mac/errors.rs +++ b/src/mac/errors.rs @@ -2,10 +2,10 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum WriterError { - #[error("unable to find a UUID for a module")] - UnknownUuid, - #[error("unable to find the main executable image for the process")] - NoExecutableImage, #[error(transparent)] TaskDumpError(#[from] crate::mac::task_dumper::TaskDumpError), + #[error("Failed to write to memory")] + MemoryWriterError(#[from] crate::mem_writer::MemoryWriterError), + #[error("Failed to write to file")] + FileWriterError(#[from] crate::dir_section::FileWriterError), } diff --git a/src/mac/minidump_writer.rs b/src/mac/minidump_writer.rs index ea8484cc..05293852 100644 --- a/src/mac/minidump_writer.rs +++ b/src/mac/minidump_writer.rs @@ -5,90 +5,6 @@ use std::io::{Seek, Write}; pub type DumpBuf = Buffer; type Result = std::result::Result; -#[derive(Debug)] -pub struct DirSection<'a, W> -where - W: Write + Seek, -{ - curr_idx: usize, - section: MemoryArrayWriter, - /// If we have to append to some file, we have to know where we currently are - destination_start_offset: u64, - destination: &'a mut W, - last_position_written_to_file: u64, -} - -impl<'a, W> DirSection<'a, W> -where - W: Write + Seek, -{ - fn new( - buffer: &mut DumpBuf, - index_length: u32, - destination: &'a mut W, - ) -> std::result::Result { - let dir_section = - MemoryArrayWriter::::alloc_array(buffer, index_length as usize)?; - Ok(DirSection { - curr_idx: 0, - section: dir_section, - destination_start_offset: destination.seek(SeekFrom::Current(0))?, - destination, - last_position_written_to_file: 0, - }) - } - - fn position(&self) -> u32 { - self.section.position - } - - fn dump_dir_entry( - &mut self, - buffer: &mut DumpBuf, - dirent: MDRawDirectory, - ) -> std::result::Result<(), FileWriterError> { - self.section.set_value_at(buffer, dirent, self.curr_idx)?; - - // Now write it to file - - // First get all the positions - let curr_file_pos = self.destination.seek(SeekFrom::Current(0))?; - let idx_pos = self.section.location_of_index(self.curr_idx); - self.curr_idx += 1; - - self.destination.seek(std::io::SeekFrom::Start( - self.destination_start_offset + idx_pos.rva as u64, - ))?; - let start = idx_pos.rva as usize; - let end = (idx_pos.rva + idx_pos.data_size) as usize; - self.destination.write_all(&buffer[start..end])?; - - // Reset file-position - self.destination - .seek(std::io::SeekFrom::Start(curr_file_pos))?; - - Ok(()) - } - - /// Writes 2 things to file: - /// 1. The given dirent into the dir section in the header (if any is given) - /// 2. Everything in the in-memory buffer that was added since the last call to this function - fn write_to_file( - &mut self, - buffer: &mut DumpBuf, - dirent: Option, - ) -> std::result::Result<(), FileWriterError> { - if let Some(dirent) = dirent { - self.dump_dir_entry(buffer, dirent)?; - } - - let start_pos = self.last_position_written_to_file as usize; - self.destination.write_all(&buffer[start_pos..])?; - self.last_position_written_to_file = buffer.position(); - Ok(()) - } -} - pub struct MinidumpWriter { /// The crash context as captured by an exception handler crash_context: crash_context::CrashContext, From c73f1f45bc803f6e6503a0240517ccb5cb482a17 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 14 Apr 2022 17:31:07 +0200 Subject: [PATCH 08/53] Finish up refactoring and actually get it compiling --- src/mac.rs | 2 +- src/mac/{mach_helpers.rs => mach.rs} | 158 ++++++++++++++++------ src/mac/minidump_writer.rs | 59 +++++---- src/mac/streams.rs | 9 +- src/mac/streams/breakpad_info.rs | 40 +++--- src/mac/streams/exception.rs | 58 +++++++++ src/mac/streams/memory_list.rs | 27 ++-- src/mac/streams/misc_info.rs | 188 +++++++++++++++------------ src/mac/streams/module_list.rs | 49 ++++--- src/mac/streams/system_info.rs | 101 +++----------- src/mac/streams/thread_list.rs | 85 ++++++------ src/mac/task_dumper.rs | 114 +++++++++++----- 12 files changed, 531 insertions(+), 359 deletions(-) rename src/mac/{mach_helpers.rs => mach.rs} (80%) create mode 100644 src/mac/streams/exception.rs diff --git a/src/mac.rs b/src/mac.rs index 826b8c17..4f8edd0d 100644 --- a/src/mac.rs +++ b/src/mac.rs @@ -2,7 +2,7 @@ compile_error!("Various MacOS FFI bindings assume we are on a 64-bit architechture"); pub mod errors; -mod mach_helpers; +mod mach; pub mod minidump_writer; mod streams; mod task_dumper; diff --git a/src/mac/mach_helpers.rs b/src/mac/mach.rs similarity index 80% rename from src/mac/mach_helpers.rs rename to src/mac/mach.rs index 70eef097..651a9f7b 100644 --- a/src/mac/mach_helpers.rs +++ b/src/mac/mach.rs @@ -3,11 +3,13 @@ // Just exports all of the mach functions we use into a flat list pub use mach2::{ - kern_return::KERN_SUCCESS, - task, task_info, + kern_return::{kern_return_t, KERN_SUCCESS}, + port::mach_port_name_t, + task::{self, task_threads}, + task_info, thread_act::thread_get_state, traps::mach_task_self, - vm::{mach_vm_deallocate, mach_vm_read}, + vm::{mach_vm_deallocate, mach_vm_read, mach_vm_region_recurse}, vm_region::vm_region_submap_info_64, }; @@ -141,7 +143,7 @@ impl From for KernelError { match kr { KERN_INVALID_ADDRESS => Self::InvalidAddress, - KERN_PROTECTED_FAILURE => Self::ProtectionFailure, + KERN_PROTECTION_FAILURE => Self::ProtectionFailure, KERN_NO_SPACE => Self::NoSpace, KERN_INVALID_ARGUMENT => Self::InvalidArgument, KERN_FAILURE => Self::Failure, @@ -187,8 +189,8 @@ impl From for KernelError { KERN_OPERATION_TIMED_OUT => Self::OperationTimedOut, KERN_CODESIGN_ERROR => Self::CodesignError, KERN_POLICY_STATIC => Self::PoicyStatic, - KERN_INSUFFICIENT_BUFFER_SIZE => Self::InsufficientBufferSize, - KERN_DENIED => Self::Denied, + 52 => Self::InsufficientBufferSize, + 53 => Self::Denied, 54 => Self::MissingKC, 55 => Self::InvalidKC, 56 => Self::NotFound, @@ -208,7 +210,7 @@ cfg_if::cfg_if! { /// x86_THREAD_STATE64 in /usr/include/mach/i386/thread_status.h pub const THREAD_STATE_FLAVOR: u32 = 4; - type ArchTreadState = mach2::structs::x86_thread_state64_t; + pub type ArchThreadState = mach2::structs::x86_thread_state64_t; } else if #[cfg(target_arch = "aarch64")] { /// ARM_THREAD_STATE64 in /usr/include/mach/arm/thread_status.h pub const THREAD_STATE_FLAVOR: u32 = 6; @@ -216,17 +218,17 @@ cfg_if::cfg_if! { // Missing from mach2 atm // _STRUCT_ARM_THREAD_STATE64 from /usr/include/mach/arm/_structs.h #[repr(C)] - struct Arm64ThreadState { - x: [u64; 29], - fp: u64, - lr: u64, - sp: u64, - pc: u64, - cpsr: u32, + pub struct Arm64ThreadState { + pub x: [u64; 29], + pub fp: u64, + pub lr: u64, + pub sp: u64, + pub pc: u64, + pub cpsr: u32, __pad: u32, } - type ArchTreadState = Arm64ThreadState; + pub type ArchThreadState = Arm64ThreadState; } else { compile_error!("unsupported target arch"); } @@ -241,42 +243,41 @@ impl Default for ThreadState { fn default() -> Self { Self { state: [0u32; THREAD_STATE_MAX], - state_size: THREAD_STATE_MAX * std::mem::size_of::() as u32, + state_size: (THREAD_STATE_MAX * std::mem::size_of::()) as u32, } } } impl ThreadState { /// Gets the program counter + #[inline] pub fn pc(&self) -> u64 { cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { - let inner = self.as_ref(); - inner.__pc + self.arch_state().__pc } else if #[cfg(target_arch = "aarch64")] { - let inner = self.as_ref(); - inner.pc + self.arch_state().pc } } } /// Gets the stack pointer + #[inline] pub fn sp(&self) -> u64 { cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { - let inner = self.as_ref(); - inner.__sp + self.arch_state().__sp } else if #[cfg(target_arch = "aarch64")] { - let inner = self.as_ref(); - inner.sp + self.arch_state().sp } } } -} -impl AsRef for ThreadState { - fn as_ref(&self) -> &ArchThreadState { - &*(self.state.as_ptr().cast()) + /// Converts the raw binary blob into the architecture specific state + #[inline] + pub fn arch_state(&self) -> &ArchThreadState { + // SAFETY: hoping the kernel isn't lying + unsafe { &*(self.state.as_ptr().cast()) } } } @@ -292,16 +293,17 @@ pub trait TaskInfo { // usr/include/mach-o/loader.h, the file type for the main executable image const MH_EXECUTE: u32 = 0x2; // usr/include/mach-o/loader.h, magic number for MachHeader -const MH_MAGIC_64: u32 = 0xfeedfacf; +pub const MH_MAGIC_64: u32 = 0xfeedfacf; // usr/include/mach-o/loader.h, command to map a segment -const LC_SEGMENT_64: u32 = 0x19; +pub const LC_SEGMENT_64: u32 = 0x19; // usr/include/mach-o/loader.h, dynamically linked shared lib ident -const LC_ID_DYLIB: u32 = 0xd; +pub const LC_ID_DYLIB: u32 = 0xd; // usr/include/mach-o/loader.h, the uuid -const LC_UUID: u32 = 0x1b; +pub const LC_UUID: u32 = 0x1b; // usr/include/mach-o/loader.h #[repr(C)] +#[derive(Clone)] pub struct MachHeader { pub magic: u32, // mach magic number identifier pub cpu_type: i32, // cpu_type_t cpu specifier @@ -384,15 +386,16 @@ pub struct UuidCommand { /// A block of load commands for a particular image pub struct LoadCommands { /// The block of memory containing all of the load commands - pub buf: Vec, + pub buffer: Vec, /// The number of actual load commmands that _should_ be in the buffer pub count: u32, } impl LoadCommands { - fn iter(&self) -> LoadCommandsIter<'_> { + #[inline] + pub fn iter(&self) -> LoadCommandsIter<'_> { LoadCommandsIter { - buf: &self.buf, + buffer: &self.buffer, count: self.count, } } @@ -430,9 +433,15 @@ impl<'buf> Iterator for LoadCommandsIter<'buf> { } let cmd = match header.cmd { - LC_SEGMENT_64 => Some(&*(self.buffer.as_ptr().cast::())), - LC_ID_DYLIB => Some(&*(self.buffer.as_ptr().cast::())), - LC_UUID => Some(&*(self.buffer.as_ptr().cast::())), + LC_SEGMENT_64 => Some(LoadCommand::Segment( + &*(self.buffer.as_ptr().cast::()), + )), + LC_ID_DYLIB => Some(LoadCommand::Dylib( + &*(self.buffer.as_ptr().cast::()), + )), + LC_UUID => Some(LoadCommand::Uuid( + &*(self.buffer.as_ptr().cast::()), + )), // Just ignore any other load commands _ => None, }; @@ -452,3 +461,76 @@ impl<'buf> Iterator for LoadCommandsIter<'buf> { (sz, Some(sz)) } } + +/// Retrieves an integer sysctl by name. Returns the default value if retrieval +/// fails. +pub fn sysctl_by_name(name: &[u8]) -> T { + let mut out = T::default(); + let mut len = std::mem::size_of_val(&out); + + // SAFETY: syscall + unsafe { + if libc::sysctlbyname( + name.as_ptr().cast(), + (&mut out as *mut T).cast(), + &mut len, + std::ptr::null_mut(), + 0, + ) != 0 + { + // log? + T::default() + } else { + out + } + } +} + +/// Retrieves an `i32` sysctl by name and casts it to the specified integer type. +/// Returns the default value if retrieval fails or the value is out of bounds of +/// the specified integer type. +pub fn int_sysctl_by_name + Default>(name: &[u8]) -> T { + let val = sysctl_by_name::(name); + T::try_from(val).unwrap_or_default() +} + +/// Retrieves a string sysctl by name. Returns an empty string if the retrieval +/// fails or the string can't be converted to utf-8. +pub fn sysctl_string(name: &[u8]) -> String { + let mut buf_len = 0; + + // SAFETY: syscalls + let string_buf = unsafe { + // Retrieve the size of the string (including null terminator) + if libc::sysctlbyname( + name.as_ptr().cast(), + std::ptr::null_mut(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + || buf_len <= 1 + { + return String::new(); + } + + let mut buff = Vec::new(); + buff.resize(buf_len, 0); + + if libc::sysctlbyname( + name.as_ptr().cast(), + buff.as_mut_ptr().cast(), + &mut buf_len, + std::ptr::null_mut(), + 0, + ) != 0 + { + return String::new(); + } + + buff.pop(); // remove null terminator + buff + }; + + String::from_utf8(string_buf).unwrap_or_default() +} diff --git a/src/mac/minidump_writer.rs b/src/mac/minidump_writer.rs index 05293852..6f68a3f2 100644 --- a/src/mac/minidump_writer.rs +++ b/src/mac/minidump_writer.rs @@ -1,16 +1,19 @@ -use crate::mac::errors::WriterError; -use crash_context::CrashContext; +use crate::{ + dir_section::{DirSection, DumpBuf}, + mac::{errors::WriterError, task_dumper::TaskDumper}, + mem_writer::*, + minidump_format::{self, MDMemoryDescriptor, MDRawDirectory, MDRawHeader}, +}; use std::io::{Seek, Write}; -pub type DumpBuf = Buffer; type Result = std::result::Result; pub struct MinidumpWriter { /// The crash context as captured by an exception handler - crash_context: crash_context::CrashContext, + pub(crate) crash_context: crash_context::CrashContext, /// List of raw blocks of memory we've written into the stream. These are /// referenced by other streams (eg thread list) - memory_blocks: Vec, + pub(crate) memory_blocks: Vec, } impl MinidumpWriter { @@ -24,20 +27,25 @@ impl MinidumpWriter { pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result> { let writers = { - let mut writers = vec![ - Self::write_thread_list, - Self::write_memory_list, - Self::write_system_info, - Self::write_module_list, - Self::write_misc_info, - Self::write_breakpad_info, + #[allow(clippy::type_complexity)] + let mut writers: Vec< + Box Result>, + > = vec![ + Box::new(|mw, buffer, dumper| mw.write_thread_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_memory_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_system_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_module_list(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_misc_info(buffer, dumper)), + Box::new(|mw, buffer, dumper| mw.write_breakpad_info(buffer, dumper)), ]; // Exception stream needs to be the last entry in this array as it may // be omitted in the case where the minidump is written without an // exception. if self.crash_context.exception.is_some() { - writers.push_back(Self::write_exception); + writers.push(Box::new(|mw, buffer, dumper| { + mw.write_exception(buffer, dumper) + })); } writers @@ -46,32 +54,35 @@ impl MinidumpWriter { let num_writers = writers.len() as u32; let mut buffer = Buffer::with_capacity(0); - let mut header_section = MemoryWriter::::alloc(buffer)?; - let mut dir_section = DirSection::new(buffer, num_writers, destination)?; + let mut header_section = MemoryWriter::::alloc(&mut buffer)?; + let mut dir_section = DirSection::new(&mut buffer, num_writers, destination)?; let header = MDRawHeader { - signature: MD_HEADER_SIGNATURE, - version: MD_HEADER_VERSION, + signature: minidump_format::MD_HEADER_SIGNATURE, + version: minidump_format::MD_HEADER_VERSION, stream_count: num_writers, stream_directory_rva: dir_section.position(), checksum: 0, /* Can be 0. In fact, that's all that's * been found in minidump files. */ time_date_stamp: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH)? + .duration_since(std::time::UNIX_EPOCH) + .unwrap() .as_secs() as u32, // TODO: This is not Y2038 safe, but thats how its currently defined as flags: 0, }; - header_section.set_value(buffer, header)?; + header_section.set_value(&mut buffer, header)?; // Ensure the header gets flushed. If we crash somewhere below, // we should have a mostly-intact dump - dir_section.write_to_file(buffer, None)?; + dir_section.write_to_file(&mut buffer, None)?; - for writer in writers { - let dirent = writer(self, buffer, dumper)?; - dir_section.write_to_file(buffer, Some(dirent))?; + let dumper = super::task_dumper::TaskDumper::new(self.crash_context.task); + + for mut writer in writers { + let dirent = writer(self, &mut buffer, &dumper)?; + dir_section.write_to_file(&mut buffer, Some(dirent))?; } - Ok(buffer) + Ok(buffer.into()) } } diff --git a/src/mac/streams.rs b/src/mac/streams.rs index 6a87f66b..e1e6d6a4 100644 --- a/src/mac/streams.rs +++ b/src/mac/streams.rs @@ -1,4 +1,5 @@ mod breakpad_info; +mod exception; mod memory_list; mod misc_info; mod module_list; @@ -6,7 +7,9 @@ mod system_info; mod thread_list; use super::{ - minidump_writer::{DumpBuf, MinidumpWriter}, - task_dumper::TaskDumper, + errors::WriterError, + mach, + minidump_writer::MinidumpWriter, + task_dumper::{ImageInfo, TaskDumpError, TaskDumper}, }; -use crate::mac::errors::ker_ret; +use crate::{dir_section::DumpBuf, mem_writer::*, minidump_format::*}; diff --git a/src/mac/streams/breakpad_info.rs b/src/mac/streams/breakpad_info.rs index d0981268..79188998 100644 --- a/src/mac/streams/breakpad_info.rs +++ b/src/mac/streams/breakpad_info.rs @@ -1,24 +1,28 @@ use super::*; use format::{BreakpadInfoValid, MINIDUMP_BREAKPAD_INFO as BreakpadInfo}; -impl MiniDumpWriter { - fn write_breakpad_info(&mut self, buffer: &mut DumpBuf) -> Result { - let mut bp_section = MemoryWriter::::alloc(buffer)?; - let dirent = MDRawDirectory { - stream_type: MDStreamType::BreakpadInfoStream as u32, - location: info_section.location(), - }; - - let bp_info = BreakpadInfo { - validity: BreakpadInfoValid::DumpThreadId.bits() - | BreakpadInfoValid::RequestingThreadId.bits(), - // The thread where the exception port handled the exception, might - // be useful to ignore/deprioritize when processing the minidump - dump_thread_id: self.crash_context.handler_thread, - // The actual thread where the exception was thrown - requesting_thread_id: self.crash_context.thread, - }; +impl MinidumpWriter { + pub(crate) fn write_breakpad_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result { + let bp_section = MemoryWriter::::alloc_with_val( + buffer, + BreakpadInfo { + validity: BreakpadInfoValid::DumpThreadId.bits() + | BreakpadInfoValid::RequestingThreadId.bits(), + // The thread where the exception port handled the exception, might + // be useful to ignore/deprioritize when processing the minidump + dump_thread_id: self.crash_context.handler_thread, + // The actual thread where the exception was thrown + requesting_thread_id: self.crash_context.thread, + }, + )?; - Ok(dirent) + Ok(MDRawDirectory { + stream_type: MDStreamType::BreakpadInfoStream as u32, + location: bp_section.location(), + }) } } diff --git a/src/mac/streams/exception.rs b/src/mac/streams/exception.rs new file mode 100644 index 00000000..ab7507e9 --- /dev/null +++ b/src/mac/streams/exception.rs @@ -0,0 +1,58 @@ +use super::*; + +impl MinidumpWriter { + pub(crate) fn write_exception( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { + let thread_state = dumper.read_thread_state(self.crash_context.thread).ok(); + + let thread_context = if let Some(ts) = &thread_state { + let mut cpu = Default::default(); + Self::fill_cpu_context(ts, &mut cpu); + MemoryWriter::alloc_with_val(buffer, cpu) + .map(|mw| mw.location()) + .ok() + } else { + None + }; + + let exception_record = self + .crash_context + .exception + .as_ref() + .map(|exc| { + let exception_address = if let Some(subcode) = exc.subcode { + subcode as u64 + } else if let Some(ts) = thread_state { + ts.pc() + } else { + 0 + }; + + // The naming is confusing here, but it is how it is + MDException { + exception_code: exc.kind as u32, + exception_flags: exc.code as u32, + exception_address, + ..Default::default() + } + }) + .unwrap_or_default(); + + let stream = MDRawExceptionStream { + thread_id: self.crash_context.thread, + exception_record, + thread_context: thread_context.unwrap_or_default(), + __align: 0, + }; + + let exc_section = MemoryWriter::::alloc_with_val(buffer, stream)?; + + Ok(MDRawDirectory { + stream_type: MDStreamType::ExceptionStream as u32, + location: exc_section.location(), + }) + } +} diff --git a/src/mac/streams/memory_list.rs b/src/mac/streams/memory_list.rs index 78db79e6..240b7ce5 100644 --- a/src/mac/streams/memory_list.rs +++ b/src/mac/streams/memory_list.rs @@ -1,46 +1,46 @@ use super::*; -impl MiniDumpWriter { - fn write_memory_list( +impl MinidumpWriter { + pub(crate) fn write_memory_list( &mut self, buffer: &mut DumpBuf, dumper: &TaskDumper, ) -> Result { // Include some memory around the instruction pointer if the crash was // due to an exception - const IP_MEM_SIZE: usize = 256; + const IP_MEM_SIZE: u64 = 256; - if self.crash_context.exc_info.is_some() { - let get_ip_block = |task, tid| -> Option { - let thread_state = dumper.get_thread_state(tid).ok()?; + if self.crash_context.exception.is_some() { + let get_ip_block = |tid| -> Option> { + let thread_state = dumper.read_thread_state(tid).ok()?; let ip = thread_state.pc(); // Bound it to the upper and lower bounds of the region // it's contained within. If it's not in a known memory region, // don't bother trying to write it. - let region = self.get_vm_region(ip).ok()?; + let region = dumper.get_vm_region(ip).ok()?; - if ip < region.start || ip > region.end { + if ip < region.range.start || ip > region.range.end { return None; } // Try to get IP_MEM_SIZE / 2 bytes before and after the IP, but // settle for whatever's available. - let start = std::cmp::max(region.start, ip - IP_MEM_SIZE / 2); - let end = std::cmp::min(ip + IP_MEM_SIZE / 2, region.end); + let start = std::cmp::max(region.range.start, ip - IP_MEM_SIZE / 2); + let end = std::cmp::min(ip + IP_MEM_SIZE / 2, region.range.end); Some(start..end) }; - if let Some(ip_range) = get_ip_block() { + if let Some(ip_range) = get_ip_block(self.crash_context.thread) { let size = ip_range.end - ip_range.start; - let stack_buffer = dumper.read_task_memory(ip_range.start as _, size)?; + let stack_buffer = dumper.read_task_memory(ip_range.start as _, size as usize)?; let ip_location = MDLocationDescriptor { data_size: size as u32, rva: buffer.position() as u32, }; - buffer.write_all(&stack_buffer)?; + buffer.write_all(&stack_buffer); self.memory_blocks.push(MDMemoryDescriptor { start_of_memory_range: ip_range.start, @@ -61,5 +61,6 @@ impl MiniDumpWriter { MemoryArrayWriter::::alloc_from_array(buffer, &self.memory_blocks)?; dirent.location.data_size += block_list.location().data_size; + Ok(dirent) } } diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 7c76101e..20d42466 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -1,14 +1,15 @@ use super::*; use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; -use std::ffi::c_void; +use std::{ffi::c_void, time::Duration}; #[repr(C)] +#[derive(Copy, Clone)] struct TimeValue { seconds: i32, microseconds: i32, } -impl From for std::time::Duration { +impl From for Duration { fn from(tv: TimeValue) -> Self { let mut seconds = tv.seconds as u64; let mut microseconds = tv.microseconds as u32; @@ -19,7 +20,7 @@ impl From for std::time::Duration { microseconds -= 1000000; } - std::time::Duration::new(seconds, microseconds * 1000) + Duration::new(seconds, microseconds * 1000) } } @@ -34,20 +35,18 @@ struct MachTaskBasicInfo { suspend_count: i32, // suspend count for task } +impl mach::TaskInfo for MachTaskBasicInfo { + const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; +} + #[repr(C)] struct TaskThreadsTimeInfo { user_time: TimeValue, // total user run time for live threads system_time: TimeValue, // total system run time for live threads } -extern "C" { - /// /usr/include/mach/mach_traps.h - /// - /// This seems to be marked as "obsolete" so might disappear at some point? - fn pid_for_task( - task: mach2::port::mach_port_name_t, - pid: *mut i32, - ) -> mach2::kern_return::kern_return_t; +impl mach::TaskInfo for TaskThreadsTimeInfo { + const FLAVOR: u32 = mach::task_info::TASK_THREAD_TIMES_INFO; } #[repr(C)] @@ -153,20 +152,34 @@ struct KInfoProc { kp_eproc: EProc, } -impl MiniDumpWriter { - fn write_misc_info(&mut self, buffer: &mut DumpBuf) -> Result { +impl MinidumpWriter { + pub(crate) fn write_misc_info( + &mut self, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { let mut info_section = MemoryWriter::::alloc(buffer)?; let dirent = MDRawDirectory { stream_type: MDStreamType::MiscInfoStream as u32, location: info_section.location(), }; + let pid = dumper.pid_for_task()?; + let mut misc_info = MDRawMiscInfo { size_of_info: std::mem::size_of::() as u32, flags1: MiscInfoFlags::MINIDUMP_MISC1_PROCESS_ID.bits() | MiscInfoFlags::MINIDUMP_MISC1_PROCESS_TIMES.bits() | MiscInfoFlags::MINIDUMP_MISC1_PROCESSOR_POWER_INFO.bits(), - ..Default::default() + process_id: pid as u32, + process_create_time: 0, + process_user_time: 0, + process_kernel_time: 0, + processor_max_mhz: 0, + processor_current_mhz: 0, + processor_mhz_limit: 0, + processor_max_idle_state: 0, + processor_current_idle_state: 0, }; // Note that Breakpad is using `getrusage` to get process times, but that @@ -176,83 +189,86 @@ impl MiniDumpWriter { // uses to get the information for the actual crashed process which is // far more interesting and relevant // - // SAFETY: syscalls - unsafe { - let mut pid = 0; - kern_ret(|| pid_for_task(self.crash_context.task, &mut pid))?; - - let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_PID, pid]; - let mut kinfo_proc = std::mem::MaybeUninit::::zeroed(); - let mut len = std::mem::size_of::(); - - if libc::sysctl( - mib.as_mut_ptr().cast(), - std::mem::size_of_val(&mib) as u32, - kinfo_proc.as_mut_ptr().cast(), - &mut len, - ) != 0 + // SAFETY: syscall + misc_info.process_create_time = unsafe { + let pid = dumper.pid_for_task()?; + + // Breakpad was using an old method to retrieve this, let's try the + // BSD method instead which is already implemented in libc + let mut proc_info = std::mem::MaybeUninit::::uninit(); + let size = std::mem::size_of::() as i32; + if libc::proc_pidinfo( + pid, + libc::PROC_PIDTBSDINFO, + 0, + proc_info.as_mut_ptr().cast(), + size, + ) == size { - return Err(std::io::Error::last_os_error().into()); - } + let proc_info = proc_info.assume_init(); - let kinfo_proc = kinfo_proc.assume_init(); - - // This sysctl does not return an error if the pid was not found. 10.9.5 - // xnu-2422.115.4/bsd/kern/kern_sysctl.c sysctl_prochandle() calls - // xnu-2422.115.4/bsd/kern/kern_proc.c proc_iterate(), which provides no - // indication of whether anything was done. To catch this, check that the PID - // has changed from the 0 - if kinfo_proc.kp_proc.p_pid == 0 { - return Err(); + proc_info.pbi_start_tvsec as u32 + } else { + 0 } - misc_info.process_create_time = kinfo_proc.kp_proc.starttime.tv_sec as u32; - - // The basic task info keeps the timings for all of the terminated threads - let mut basic_info = std::mem::MaybeUninit::::uninit(); - let mut count = std::mem::size_of::() - / std::mem::size_of::(); - - kern_ret(|| { - mach2::task::task_info( - task, - mach2::task_info::MACH_TASK_BASIC_INFO, - basic_info.as_mut_ptr().cast(), - &mut count, - ) - }) - .ok()?; - - // THe thread times info keeps the timings for all of the living threads - let mut thread_times_info = std::mem::MaybeUninit::::uninit(); - let mut count = std::mem::size_of::() - / std::mem::size_of::(); - - kern_ret(|| { - mach2::task::task_info( - task, - mach2::task_info::TASK_THREAD_TIMES_INFO, - thread_times_info.as_mut_ptr().cast(), - &mut count, - ) - }) - .ok()?; - - let basic_info = basic_info.assume_init(); - let thread_times_info = thread_times_info.assume_init(); - - let user_time: std::time::Duration = - basic_info.user_time.into() + thread_times_info.user_time.into(); - let system_time: std::time::Duration = - basic_info.system_time.into() + thread_times_info.system_time.into(); - - misc_info.process_user_time = user_time.as_secs() as u32; - misc_info.process_kernel_time = system_time.as_secs() as u32; - } + // let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_PID, pid]; + // let mut kinfo_proc = std::mem::MaybeUninit::::zeroed(); + // let mut len = std::mem::size_of::(); + + // if libc::sysctl( + // mib.as_mut_ptr().cast(), + // std::mem::size_of_val(&mib) as u32, + // kinfo_proc.as_mut_ptr().cast(), + // &mut len, + // ) != 0 + // { + // return Err(std::io::Error::last_os_error().into()); + // } + + // let kinfo_proc = kinfo_proc.assume_init(); + + // // This sysctl does not return an error if the pid was not found. 10.9.5 + // // xnu-2422.115.4/bsd/kern/kern_sysctl.c sysctl_prochandle() calls + // // xnu-2422.115.4/bsd/kern/kern_proc.c proc_iterate(), which provides no + // // indication of whether anything was done. To catch this, check that the PID + // // actually matches the one that we requested + // if kinfo_proc.kp_proc.p_pid != pid { + // 0 + // } else { + // kinfo_proc.kp_proc.starttime.tv_sec as u32 + // } + }; + + // The basic task info keeps the timings for all of the terminated threads + let basic_info = dumper.task_info::().ok(); + + // THe thread times info keeps the timings for all of the living threads + let thread_times_info = dumper.task_info::().ok(); + + let user_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.user_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.user_time)) + .unwrap_or_default(); + let system_time = basic_info + .as_ref() + .map(|bi| Duration::from(bi.system_time)) + .unwrap_or_default() + + thread_times_info + .as_ref() + .map(|tt| Duration::from(tt.system_time)) + .unwrap_or_default(); + + misc_info.process_user_time = user_time.as_secs() as u32; + misc_info.process_kernel_time = system_time.as_secs() as u32; // Note that neither of these two keys are present on aarch64, at least atm - let max: u64 = sysctl_by_name(b"hw.cpufrequency_max\0"); - let freq: u64 = sysctl_by_name(b"hw.cpufrequency\0"); + let max: u64 = mach::sysctl_by_name(b"hw.cpufrequency_max\0"); + let freq: u64 = mach::sysctl_by_name(b"hw.cpufrequency\0"); let max = (max / 1000 * 1000) as u32; let current = (freq / 1000 * 1000) as u32; @@ -261,7 +277,7 @@ impl MiniDumpWriter { misc_info.processor_mhz_limit = max; misc_info.processor_current_mhz = current; - info_section.set_value(misc_info); + info_section.set_value(buffer, misc_info)?; Ok(dirent) } diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index 33987057..7d13a6a2 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -1,7 +1,7 @@ use super::*; -impl MiniDumpWriter { - fn write_module_list( +impl MinidumpWriter { + pub(crate) fn write_module_list( &mut self, buffer: &mut DumpBuf, dumper: &TaskDumper, @@ -9,7 +9,7 @@ impl MiniDumpWriter { // The list of modules is pretty critical information, but there could // still be useful information in the minidump without them if we can't // retrieve them for some reason - let modules = self.read_loaded_modules(dumper).unwrap_or_default(); + let modules = self.read_loaded_modules(buffer, dumper).unwrap_or_default(); let list_header = MemoryWriter::::alloc_with_val(buffer, modules.len() as u32)?; @@ -43,7 +43,7 @@ impl MiniDumpWriter { let mut has_main_executable = false; for image in images { - if let Ok((module, is_main_executable)) = self.read_module(image) { + if let Ok((module, is_main_executable)) = self.read_module(image, buf, dumper) { // We want to keep the modules sorted by their load address except // in the case of the main executable image which we want to put // first as it is most likely the culprit, or at least generally @@ -58,9 +58,9 @@ impl MiniDumpWriter { } if !has_main_executable { - Err(WriterError::NoExecutableImage) + Err(TaskDumpError::NoExecutableImage.into()) } else { - Ok(images) + Ok(modules) } } @@ -81,14 +81,14 @@ impl MiniDumpWriter { let mut uuid = None; { - let load_commands = dumper.get_load_commands(&image)?; + let load_commands = dumper.read_load_commands(&image)?; for lc in load_commands.iter() { match lc { mach::LoadCommand::Segment(seg) if sizes.is_none() => { - if seg.segment_name[..7] == b"__TEXT\0" { + if &seg.segment_name[..7] == b"__TEXT\0" { let slide = if seg.file_off == 0 && seg.file_size != 0 { - image.load_address - seg.vm_addr + (image.load_address - seg.vm_addr) as isize } else { 0 }; @@ -101,21 +101,28 @@ impl MiniDumpWriter { } } mach::LoadCommand::Dylib(dylib) if version.is_none() => { - version = Some(dylib.current_version); + version = Some(dylib.dylib.current_version); } mach::LoadCommand::Uuid(img_id) if uuid.is_none() => { uuid = Some(img_id.uuid); } + _ => {} } - if image_sizes.is_some() && image_version.is_some() && image_uuid.is_some() { + if sizes.is_some() && version.is_some() && uuid.is_some() { break; } } } - let image_sizes = image_sizes.ok_or_else(|| WriterError::InvalidMachHeader)?; - let uuid = image_uuid.ok_or_else(|| WriterError::UnknownUuid)?; + let sizes = sizes.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_SEGMENT_64", + id: mach::LC_SEGMENT_64, + })?; + let uuid = uuid.ok_or(TaskDumpError::MissingLoadCommand { + name: "LC_UUID", + id: mach::LC_UUID, + })?; let file_path = if image.file_path != 0 { dumper.read_string(image.file_path)?.unwrap_or_default() @@ -126,15 +133,15 @@ impl MiniDumpWriter { let module_name = write_string_to_location(buf, &file_path)?; let mut raw_module = MDRawModule { - base_of_image: image_sizes.vm_addr + image_sizes.slide, - size_of_image: image_sizes.vm_size as u32, + base_of_image: (sizes.vm_addr as isize + sizes.slide) as u64, + size_of_image: sizes.vm_size as u32, module_name_rva: module_name.rva, ..Default::default() }; // Version info is not available for the main executable image since // it doesn't issue a LC_ID_DYLIB load command - if let Some(version) = &image_version { + if let Some(version) = &version { raw_module.version_info.signature = format::VS_FFI_SIGNATURE; raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; @@ -167,7 +174,7 @@ impl MiniDumpWriter { let cv = MemoryWriter::alloc_with_val( buf, CvInfoPdb { - cv_signature: format::CvSignature::Pdb70, + cv_signature: format::CvSignature::Pdb70 as u32, age: 0, signature: uuid.into(), }, @@ -176,13 +183,13 @@ impl MiniDumpWriter { // Note that we don't use write_string_to_location here as the module // name is a simple 8-bit string, not 16-bit like most other strings // in the minidump, and is directly part of the record itself, not an rva - buf.write_all(module_name.as_bytes())?; - buf.write_all(&[0])?; // null terminator + buf.write_all(module_name.as_bytes()); + buf.write_all(&[0]); // null terminator let mut cv_location = cv.location(); - cv_location.size += module_name.len() as u32 + 1; + cv_location.data_size += module_name.len() as u32 + 1; raw_module.cv_record = cv_location; - Ok((raw_module, image_version.is_none())) + Ok((raw_module, version.is_none())) } } diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs index b20ba27b..7357f719 100644 --- a/src/mac/streams/system_info.rs +++ b/src/mac/streams/system_info.rs @@ -1,72 +1,6 @@ use super::*; use crate::minidump_format::*; -fn sysctl_by_name(name: &[u8]) -> T { - let mut out = T::default(); - let mut len = std::mem::size_of_val(&out); - - // SAFETY: syscall - unsafe { - if libc::sysctlbyname( - name.as_ptr().cast(), - (&mut out).cast(), - &mut len, - std::ptr::null_mut(), - 0, - ) != 0 - { - // log? - T::default() - } else { - out - } - } -} - -fn int_sysctl_by_name + Default>(name: &[u8]) -> T { - let val = sysctl_by_name::(name); - T::try_from(val).unwrap_or_default() -} - -fn sysctl_string(name: &[u8]) -> String { - let mut buf_len = 0; - - // SAFETY: syscalls - let string_buf = unsafe { - // Retrieve the size of the string (including null terminator) - if libc::sysctlbyname( - name.as_ptr().cast(), - std::ptr::null_mut(), - &mut buf_len, - std::ptr::null_mut(), - 0, - ) != 0 - || buf_len <= 1 - { - return String::new(); - } - - let mut buff = Vec::new(); - buff.resize(buf_len, 0); - - if libc::sysctlbyname( - name.as_ptr().cast(), - buff.as_mut_ptr().cast(), - &mut buf_len, - std::ptr::null_mut(), - 0, - ) != 0 - { - return String::new(); - } - - buff.pop(); // remove null terminator - buff - }; - - String::from_utf8(string_buf).unwrap_or_default() -} - /// Retrieve the OS version information. /// /// Note that this only works on 10.13.4+, but that release is over 4 years old @@ -75,7 +9,7 @@ fn sysctl_string(name: &[u8]) -> String { /// Note that Breakpad/Crashpad use a private API in CoreFoundation to do this /// via _CFCopySystemVersionDictionary->_kCFSystemVersionProductVersionKey fn os_version() -> (u32, u32, u32) { - let vers = sysctl_string(b"kern.osproductversion\0"); + let vers = mach::sysctl_string(b"kern.osproductversion\0"); let inner = || { let mut it = vers.split('.'); @@ -99,7 +33,7 @@ fn os_version() -> (u32, u32, u32) { /// its value versus the output of the `sw_vers -buildVersion` command #[inline] fn build_version() -> String { - sysctl_string(b"kern.osversion\0") + mach::sysctl_string(b"kern.osversion\0") } /// Retrieves more detailed information on the cpu. @@ -111,8 +45,8 @@ fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { return; } - let mut md_feats = 1 << 2 /*PF_COMPARE_EXCHANGE_DOUBLE*/; - let features: u64 = sysctl_by_name(b"machdep.cpu.feature_bits\0"); + let mut md_feats: u64 = 1 << 2 /*PF_COMPARE_EXCHANGE_DOUBLE*/; + let features: u64 = mach::sysctl_by_name(b"machdep.cpu.feature_bits\0"); // Map the cpuid feature to its equivalent minidump cpu feature. // See https://en.wikipedia.org/wiki/CPUID for where the values for the @@ -158,7 +92,7 @@ fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { 28 /* PF_RDRAND_INSTRUCTION_AVAILABLE */ ); - let ext_features: u64 = sysctl_by_name(b"machdep.cpu.extfeature_bits\0"); + let ext_features: u64 = mach::sysctl_by_name(b"machdep.cpu.extfeature_bits\0"); map_feature!( ext_features, @@ -171,7 +105,7 @@ fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { 7 /* PF_3DNOW_INSTRUCTIONS_AVAILABLE */ ); - let leaf_features: u32 = sysctl_by_name(b"machdep.cpu.leaf7_feature_bits\0"); + let leaf_features: u32 = mach::sysctl_by_name(b"machdep.cpu.leaf7_feature_bits\0"); map_feature!( leaf_features, 0, /* F7_FSGSBASE */ @@ -187,18 +121,22 @@ fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { // minidump_common::format::OtherCpuInfo is just 2 adjacent u64's, we only // set the first, so just do a direct write to the bytes - cpu[..std::mem::size_of::()].copy_from_slice(md_feats.to_ne_bytes()); + cpu.data[..std::mem::size_of::()].copy_from_slice(&md_feats.to_ne_bytes()); } -impl MiniDumpWriter { - fn write_system_info(&mut self, buffer: &mut DumpBuf) -> Result { +impl MinidumpWriter { + pub(crate) fn write_system_info( + &mut self, + buffer: &mut DumpBuf, + _dumper: &TaskDumper, + ) -> Result { let mut info_section = MemoryWriter::::alloc(buffer)?; let dirent = MDRawDirectory { stream_type: MDStreamType::SystemInfoStream as u32, location: info_section.location(), }; - let number_of_processors: u8 = int_sysctl_by_name(b"hw.ncpu\0"); + let number_of_processors: u8 = mach::int_sysctl_by_name(b"hw.ncpu\0"); // SAFETY: POD buffer let mut cpu: format::CPU_INFORMATION = unsafe { std::mem::zeroed() }; read_cpu_info(&mut cpu); @@ -210,15 +148,15 @@ impl MiniDumpWriter { // machdep.cpu.family and machdep.cpu.model already take the extended family // and model IDs into account. See 10.9.2 xnu-2422.90.20/osfmk/i386/cpuid.c // cpuid_set_generic_info(). - let processor_level: u16 = int_sysctl_by_name(b"machdep.cpu.family\0"); - let model: u8 = int_sysctl_by_name(b"machdep.cpu.model\0"); - let stepping: u8 = int_sysctl_by_name(b"machdep.cpu.stepping\0"); + let processor_level: u16 = mach::int_sysctl_by_name(b"machdep.cpu.family\0"); + let model: u8 = mach::int_sysctl_by_name(b"machdep.cpu.model\0"); + let stepping: u8 = mach::int_sysctl_by_name(b"machdep.cpu.stepping\0"); let processor_revision: u16 = (model << 8) | stepping; } else if #[cfg(target_arch = "aarch64")] { let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64; - let family: u32 = sysctl_by_name(b"hw.cpufamily\0"); + let family: u32 = mach::sysctl_by_name(b"hw.cpufamily\0"); let processor_level = (family & 0xffff0000 >> 16) as u16; let processor_revision = (family & 0x0000ffff) as u16; @@ -236,11 +174,10 @@ impl MiniDumpWriter { processor_level, processor_revision, number_of_processors, - product_type, cpu, // OS - platform_id: PlatformId::MacOs, + platform_id: PlatformId::MacOs as u32, product_type: 1, // VER_NT_WORKSTATION, could also be VER_NT_SERVER but...seriously? major_version, minor_version, diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index ffd5872a..5306ea00 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -1,24 +1,20 @@ use super::*; +use crate::minidump_cpu::RawContextCPU; impl MinidumpWriter { - fn write_thread_list( + pub(crate) fn write_thread_list( &mut self, buffer: &mut DumpBuf, dumper: &TaskDumper, ) -> Result { - // Retrieve the list of threads from the task that crashed. - // SAFETY: syscall - let mut threads = std::ptr::null_mut(); - let mut thread_count = 0; - - kern_ret(|| unsafe { - mach2::task::task_threads(self.crash_context.task, &mut threads, &mut thread_count) - })?; + let threads = dumper.read_threads()?; // Ignore the thread that handled the exception - if self.crash_context.handler_thread != mach2::port::MACH_PORT_NULL { - thread_count -= 1; - } + let thread_count = if self.crash_context.handler_thread != mach2::port::MACH_PORT_NULL { + threads.len() - 1 + } else { + threads.len() + }; let list_header = MemoryWriter::::alloc_with_val(buffer, thread_count as u32)?; @@ -27,20 +23,28 @@ impl MinidumpWriter { location: list_header.location(), }; - let mut thread_list = MemoryArrayWriter::::alloc_array(buffer, num_threads)?; + let mut thread_list = MemoryArrayWriter::::alloc_array(buffer, thread_count)?; dirent.location.data_size += thread_list.location().data_size; - let threads = unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }; - - for (i, tid) in threads.iter().enumerate() { - let thread = self.write_thread(buffer, tid)?; + let handler_thread = self.crash_context.handler_thread; + for (i, tid) in threads + .iter() + .filter(|tid| **tid != handler_thread) + .enumerate() + { + let thread = self.write_thread(*tid, buffer, dumper)?; thread_list.set_value_at(buffer, thread, i)?; } Ok(dirent) } - fn write_thread(&mut self, buffer: &mut DumpBuf, tid: u32) -> Result { + fn write_thread( + &mut self, + tid: u32, + buffer: &mut DumpBuf, + dumper: &TaskDumper, + ) -> Result { let mut thread = MDRawThread { thread_id: tid, suspend_count: 0, @@ -51,12 +55,12 @@ impl MinidumpWriter { thread_context: MDLocationDescriptor::default(), }; - let thread_state = Self::get_thread_state(tid)?; + let thread_state = dumper.read_thread_state(tid)?; - self.write_stack_from_start_address(thread_state.sp(), buffer, &mut thread)?; + self.write_stack_from_start_address(thread_state.sp(), &mut thread, buffer, dumper)?; let mut cpu: RawContextCPU = Default::default(); - Self::fill_cpu_context(thread_state, &mut cpu); + Self::fill_cpu_context(&thread_state, &mut cpu); let cpu_section = MemoryWriter::alloc_with_val(buffer, cpu)?; thread.thread_context = cpu_section.location(); Ok(thread) @@ -65,14 +69,15 @@ impl MinidumpWriter { fn write_stack_from_start_address( &mut self, start: u64, - buffer: &mut DumpBuf, thread: &mut MDRawThread, + buffer: &mut DumpBuf, + dumper: &TaskDumper, ) -> Result<(), WriterError> { - thread.stack.start_of_memory_range = start.try_into()?; + thread.stack.start_of_memory_range = start; thread.stack.memory.data_size = 0; thread.stack.memory.rva = buffer.position() as u32; - let stack_size = self.calculate_stack_size(start); + let stack_size = self.calculate_stack_size(start, dumper); let stack_location = if stack_size == 0 { // In some situations the stack address for the thread can come back 0. @@ -84,16 +89,16 @@ impl MinidumpWriter { data_size: 16, rva: buffer.position() as u32, }; - buffer.write_all(0xdeadbeefu64.as_ne_bytes())?; - buffer.write_all(0xdeadbeefu64.as_ne_bytes())?; + buffer.write_all(&0xdeadbeefu64.to_ne_bytes()); + buffer.write_all(&0xdeadbeefu64.to_ne_bytes()); stack_location } else { - let stack_buffer = self.read_task_memory(start, stack_size)?; + let stack_buffer = dumper.read_task_memory(start, stack_size)?; let stack_location = MDLocationDescriptor { data_size: stack_buffer.len() as u32, rva: buffer.position() as u32, }; - buffer.write_all(&stack_buffer)?; + buffer.write_all(&stack_buffer); stack_location }; @@ -102,12 +107,12 @@ impl MinidumpWriter { Ok(()) } - fn calculate_stack_size(&self, start_address: u64) -> usize { + fn calculate_stack_size(&self, start_address: u64, dumper: &TaskDumper) -> usize { if start_address == 0 { return 0; } - let mut region = if let Ok(region) = self.get_vm_region(start_address) { + let mut region = if let Ok(region) = dumper.get_vm_region(start_address) { region } else { return 0; @@ -119,6 +124,9 @@ impl MinidumpWriter { return 0; } + let root_range_start = region.range.start; + let mut stack_size = region.range.end - region.range.start; + // If the user tag is VM_MEMORY_STACK, look for more readable regions with // the same tag placed immediately above the computed stack region. Under // some circumstances, the stack for thread 0 winds up broken up into @@ -129,7 +137,7 @@ impl MinidumpWriter { loop { let proposed_next_region_base = region.range.end; - region = if let Ok(reg) = self.get_vm_region(region.range.end) { + region = if let Ok(reg) = dumper.get_vm_region(region.range.end) { reg } else { break; @@ -142,20 +150,23 @@ impl MinidumpWriter { break; } - stack_region_size += region.range.end - region.range.start; + stack_size += region.range.end - region.range.start; } } - stack_region_base + stack_region_size - start_addr + (root_range_start + stack_size - start_address) as usize } - fn fill_cpu_context(thread_state: &ThreadState, out: &mut RawContextCPU) { + pub(crate) fn fill_cpu_context( + thread_state: &crate::mac::mach::ThreadState, + out: &mut RawContextCPU, + ) { + let ts = thread_state.arch_state(); + cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { out.context_flags = format::ContextFlagsCpu::CONTEXT_AMD64.bits(); - let ts = thread_state.as_ref(); - out.rax = ts.__rax; out.rbx = ts.__rbx; out.rcx = ts.__rcx; @@ -185,8 +196,6 @@ impl MinidumpWriter { // This is kind of a lie as we don't actually include the full float state..? out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; - let ts = thread_state.as_ref(); - out.cpsr = ts.cpsr; out.iregs[..28].copy_from_slice(&ts.x[..28]); out.iregs[29] = ts.fp; diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs index 3694839e..cd7071ed 100644 --- a/src/mac/task_dumper.rs +++ b/src/mac/task_dumper.rs @@ -1,4 +1,4 @@ -use crate::mac::mach_helpers as mach; +use crate::mac::mach; use mach2::mach_types as mt; use thiserror::Error; @@ -11,6 +11,12 @@ pub enum TaskDumpError { }, #[error("detected an invalid mach image header")] InvalidMachHeader, + #[error(transparent)] + NonUtf8String(#[from] std::string::FromUtf8Error), + #[error("unable to find the main executable image for the process")] + NoExecutableImage, + #[error("expected load command {name}({id}) was not found for an image")] + MissingLoadCommand { name: &'static str, id: u32 }, } /// Wraps a mach call in a Result @@ -23,7 +29,7 @@ macro_rules! mach_call { } else { // This is ugly, improvements to the macro welcome! let mut syscall = stringify!($call); - if let Some(i) = sc.find('(') { + if let Some(i) = syscall.find('(') { syscall = &syscall[..i]; } Err(TaskDumpError::Kernel { @@ -36,10 +42,11 @@ macro_rules! mach_call { // dyld_image_info #[repr(C)] +#[derive(Clone)] pub struct ImageInfo { - load_address: u64, - file_path: u64, - file_mod_date: u64, + pub load_address: u64, + pub file_path: u64, + pub file_mod_date: u64, } impl PartialEq for ImageInfo { @@ -72,7 +79,7 @@ pub struct VMRegionInfo { /// for a task (MacOS process) pub struct TaskDumper { task: mt::task_t, - page_size: usize, + page_size: i64, } impl TaskDumper { @@ -81,24 +88,24 @@ impl TaskDumper { Self { task, // SAFETY: syscall - page_size: unsafe { libc::getpagesize() }, + page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as i64, } } /// Reads a block of memory from the task - pub fn read_task_memory( - &self, - address: u64, - count: usize, - ) -> Result, TaskDumpError> { - let length = count * std::mem::size_of::(); + pub fn read_task_memory(&self, address: u64, count: usize) -> Result, TaskDumpError> + where + T: Sized + Clone, + { + let length = (count * std::mem::size_of::()) as u64; // use the negative of the page size for the mask to find the page address - let page_address = address & -self.page_size; - let last_page_address = (address + length + self.page_size - 1) & -self.page_size; + let page_address = address & (-self.page_size as u64); + let last_page_address = + (address + length + (self.page_size - 1) as u64) & (-self.page_size as u64); let page_size = last_page_address - page_address; - let mut local_start = std::ptr::null_mut(); + let mut local_start = 0; let mut local_length = 0; mach_call!(mach::mach_vm_read( @@ -111,16 +118,23 @@ impl TaskDumper { let mut buffer = Vec::with_capacity(count); - let task_buffer = - std::slice::from_raw_parts(local_start.offset(address - page_address).cast(), count); + // SAFETY: this is safe as long as the kernel has not lied to us + let task_buffer = unsafe { + std::slice::from_raw_parts( + (local_start as *const u8) + .offset((address - page_address) as isize) + .cast(), + count, + ) + }; buffer.extend_from_slice(task_buffer); // Don't worry about the return here, if something goes wrong there's probably // not much we can do about it, and we have what we want anyways let _res = mach_call!(mach::mach_vm_deallocate( mach::mach_task_self(), - local_start, - local_length + local_start as u64, // vm_read returns a pointer, but vm_deallocate takes a integer address :-/ + local_length as u64, // vm_read and vm_deallocate use different sizes :-/ )); Ok(buffer) @@ -138,12 +152,12 @@ impl TaskDumper { /// /// Fails if the address cannot be read for some reason, or the string is /// not utf-8. - fn read_string(&self, addr: u64) -> Result, TaskDumpError> { + pub fn read_string(&self, addr: u64) -> Result, TaskDumpError> { // The problem is we don't know how much to read until we know how long // the string is. And we don't know how long the string is, until we've read // the memory! So, we'll try to read kMaxStringLength bytes // (or as many bytes as we can until we reach the end of the vm region). - let get_region_size = || { + let get_region_size = || -> Result { let region = self.get_vm_region(addr)?; let mut size_to_end = region.range.end - addr; @@ -163,14 +177,14 @@ impl TaskDumper { }; if let Ok(size_to_end) = get_region_size() { - let mut bytes = self.read_task_memory(addr, size_to_end)?; + let mut bytes = self.read_task_memory(addr, size_to_end as usize)?; // Find the null terminator and truncate our string - if let Some(null_pos) = bytes.iter().position(|c| c == 0) { + if let Some(null_pos) = bytes.iter().position(|c| *c == 0) { bytes.resize(null_pos, 0); } - String::from_utf8(bytes).map(Some)? + Ok(String::from_utf8(bytes).map(Some)?) } else { Ok(None) } @@ -182,7 +196,6 @@ impl TaskDumper { let mut region_base = addr; let mut region_size = 0; let mut nesting_level = 0; - let mut region_info = 0; let mut submap_info = std::mem::MaybeUninit::::uninit(); // mach/vm_region.h @@ -192,7 +205,7 @@ impl TaskDumper { let mut info_count = VM_REGION_SUBMAP_INFO_COUNT_64; - mach_call!(mach_vm_region_recurse( + mach_call!(mach::mach_vm_region_recurse( self.task, &mut region_base, &mut region_size, @@ -215,7 +228,7 @@ impl TaskDumper { mach_call!(mach::thread_get_state( tid, - THREAD_STATE_FLAVOR, + mach::THREAD_STATE_FLAVOR as i32, thread_state.state.as_mut_ptr(), &mut thread_state.state_size, ))?; @@ -243,7 +256,7 @@ impl TaskDumper { /// multiple images with the same load address. pub fn read_images(&self) -> Result, TaskDumpError> { impl mach::TaskInfo for mach::task_info::task_dyld_info { - const FLAVOR: mach::task_info::TASK_DYLD_INFO; + const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; } // Retrieve the address at which the list of loaded images is located @@ -277,11 +290,10 @@ impl TaskDumper { } /// Retrieves the load commands for the specified image - pub fn read_load_commands(&self, img: &ImageInfo) -> Result { - let mach_header_buf = - self.read_task_memory::(img.load_address, std::mem::size_of::())?; + pub fn read_load_commands(&self, img: &ImageInfo) -> Result { + let mach_header = self.read_task_memory::(img.load_address, 1)?; - let header: &mach::MachHeader = &*(mach_header_buf.as_ptr().cast()); + let header = &mach_header[0]; if header.magic != mach::MH_MAGIC_64 { return Err(TaskDumpError::InvalidMachHeader); @@ -292,13 +304,45 @@ impl TaskDumper { // retrieve the memory as a raw byte buffer that we can then iterate // through and step according to the size of each load command let load_commands_buf = self.read_task_memory::( - image.load_address + std::mem::size_of::() as u64, + img.load_address + std::mem::size_of::() as u64, header.size_commands as usize, )?; - Ok(mach::LoadComands { + Ok(mach::LoadCommands { buffer: load_commands_buf, count: header.num_commands, }) } + + /// Gets a list of all of the thread ids in the task + pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> { + let mut threads = std::ptr::null_mut(); + let mut thread_count = 0; + + mach_call!(mach::task_threads( + self.task, + &mut threads, + &mut thread_count + ))?; + + Ok( + // SAFETY: This should be valid if the call succeeded + unsafe { std::slice::from_raw_parts(threads, thread_count as usize) }, + ) + } + + /// Retrieves the PID for the task + pub fn pid_for_task(&self) -> Result { + extern "C" { + /// /usr/include/mach/mach_traps.h + /// + /// This seems to be marked as "obsolete" so might disappear at some point? + fn pid_for_task(task: mach::mach_port_name_t, pid: *mut i32) -> mach::kern_return_t; + } + + let mut pid = 0; + mach_call!(pid_for_task(self.task, &mut pid))?; + + Ok(pid) + } } From cc84ce91ab180c60cd96c3f0e31fc083665ace58 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 08:08:46 +0200 Subject: [PATCH 09/53] Add macos dump test --- src/bin/test.rs | 23 +++++++++ tests/mac_minidump_writer.rs | 92 ++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 tests/mac_minidump_writer.rs diff --git a/src/bin/test.rs b/src/bin/test.rs index 8a8f4095..83925f79 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -341,6 +341,27 @@ mod windows { } } +#[cfg(target_os = "macos")] +mod mac { + use super::*; + use std::mem; + + #[inline(never)] + pub(super) fn real_main(_args: Vec) -> Result<()> { + unsafe { + let task = mach2::traps::mach_task_self(); + let thread = mach2::mach_init::mach_thread_self(); + + println!("{task} {thread}"); + + // Wait until we're killed + loop { + std::thread::park(); + } + } + } +} + fn main() -> Result<()> { let args: Vec<_> = std::env::args().skip(1).collect(); @@ -349,6 +370,8 @@ fn main() -> Result<()> { linux::real_main(args) } else if #[cfg(target_os = "windows")] { windows::real_main(args) + } else if #[cfg(target_os = "macos")] { + mac::real_main(args) } else { unimplemented!(); } diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs new file mode 100644 index 00000000..c2b5e262 --- /dev/null +++ b/tests/mac_minidump_writer.rs @@ -0,0 +1,92 @@ +#![cfg(target_os = "macos")] + +mod common; +use common::start_child_and_return; + +use minidump::{ + CrashReason, Minidump, MinidumpBreakpadInfo, MinidumpMemoryList, MinidumpMiscInfo, + MinidumpModuleList, MinidumpSystemInfo, MinidumpThreadList, +}; +use minidump_writer::minidump_writer::MinidumpWriter; + +fn get_crash_reason<'a, T: std::ops::Deref + 'a>( + md: &Minidump<'a, T>, +) -> CrashReason { + let exc: minidump::MinidumpException<'_> = + md.get_stream().expect("unable to find exception stream"); + + exc.get_crash_reason( + minidump::system_info::Os::MacOs, + if cfg!(target_arch = "x86_64") { + minidump::system_info::Cpu::X86_64 + } else if cfg!(target_arch = "aarch64") { + minidump::system_info::Cpu::Arm64 + } else { + unimplemented!() + }, + ) +} + +#[test] +fn dump_external_process() { + use std::io::BufRead; + + let mut child = start_child_and_return(""); + + let (task, thread) = { + let mut f = std::io::BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); + let mut buf = String::new(); + f.read_line(&mut buf).expect("failed to read stdout"); + assert!(!buf.is_empty()); + + let mut biter = buf.trim().split(' '); + + let task: u32 = biter.next().unwrap().parse().unwrap(); + let thread: u32 = biter.next().unwrap().parse().unwrap(); + + (task, thread) + }; + + let crash_context = crash_context::CrashContext { + task, + thread, + handler_thread: mach2::port::MACH_PORT_NULL, + exception: Some(crash_context::ExceptionInfo { + kind: mach2::exception_types::EXC_BREAKPOINT as i32, + code: 100, + subcode: None, + }), + }; + + let mut tmpfile = tempfile::Builder::new() + .prefix("mac_external_process") + .tempfile() + .unwrap(); + + let mut dumper = MinidumpWriter::new(crash_context); + + dumper + .dump(tmpfile.as_file_mut()) + .expect("failed to write minidump"); + + child.kill().expect("failed to kill child"); + + let md = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); + + let _: MinidumpModuleList = md.get_stream().expect("Couldn't find MinidumpModuleList"); + let _: MinidumpThreadList = md.get_stream().expect("Couldn't find MinidumpThreadList"); + let _: MinidumpMemoryList = md.get_stream().expect("Couldn't find MinidumpMemoryList"); + let _: MinidumpSystemInfo = md.get_stream().expect("Couldn't find MinidumpSystemInfo"); + let _: MinidumpBreakpadInfo = md.get_stream().expect("Couldn't find MinidumpBreakpadInfo"); + let _: MinidumpMiscInfo = md.get_stream().expect("Couldn't find MinidumpMiscInfo"); + + let crash_reason = get_crash_reason(&md); + + assert!(matches!( + crash_reason, + CrashReason::MacGeneral( + minidump_common::errors::ExceptionCodeMac::EXC_BREAKPOINT, + 100 + ) + )); +} From 22b5d7f07f365f4eca8e18bd40b634c63ddf0617 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 08:11:26 +0200 Subject: [PATCH 10/53] Build mac in CI --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6c734ba..805c2003 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,16 +21,16 @@ jobs: - { os: ubuntu-latest , target: x86_64-unknown-linux-gnu, use-cross: false } - { os: ubuntu-latest, target: x86_64-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: i686-unknown-linux-gnu, use-cross: true } - #- { os: ubuntu-latest, target: i686-unknown-linux-musl, use-cross: true } + #- { os: ubuntu-latest, target: i686-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: aarch64-unknown-linux-gnu, use-cross: true } - { os: ubuntu-latest, target: aarch64-unknown-linux-musl, use-cross: true } - { os: ubuntu-latest, target: aarch64-linux-android, use-cross: true } - { os: ubuntu-latest, target: arm-unknown-linux-gnueabi, use-cross: true } - - { os: ubuntu-latest, target: arm-unknown-linux-musleabi, use-cross: true } + - { os: ubuntu-latest, target: arm-unknown-linux-musleabi, use-cross: true } - { os: ubuntu-latest, target: arm-linux-androideabi, use-cross: true } - { os: ubuntu-latest, target: arm-unknown-linux-gnueabihf, use-cross: true } - - { os: windows-2022, target: x86_64-pc-windows-msvc, use-cross: false } - #- { os: macos-latest, target: x86_64-apple-darwin, use-cross: false } + - { os: windows-2022, target: x86_64-pc-windows-msvc, use-cross: false } + - { os: macos-latest, target: x86_64-apple-darwin, use-cross: false } steps: - name: Checkout repository uses: actions/checkout@v2 From a721515efb498a97ab361a3eba358e8095e24cb3 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 08:59:40 +0200 Subject: [PATCH 11/53] Fixup x86_64 --- src/mac/mach.rs | 4 ++-- src/mac/streams/system_info.rs | 2 +- src/mac/streams/thread_list.rs | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mac/mach.rs b/src/mac/mach.rs index 651a9f7b..de5031fb 100644 --- a/src/mac/mach.rs +++ b/src/mac/mach.rs @@ -254,7 +254,7 @@ impl ThreadState { pub fn pc(&self) -> u64 { cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { - self.arch_state().__pc + self.arch_state().__rip } else if #[cfg(target_arch = "aarch64")] { self.arch_state().pc } @@ -266,7 +266,7 @@ impl ThreadState { pub fn sp(&self) -> u64 { cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { - self.arch_state().__sp + self.arch_state().__rsp } else if #[cfg(target_arch = "aarch64")] { self.arch_state().sp } diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs index 7357f719..0fbbc030 100644 --- a/src/mac/streams/system_info.rs +++ b/src/mac/streams/system_info.rs @@ -152,7 +152,7 @@ impl MinidumpWriter { let model: u8 = mach::int_sysctl_by_name(b"machdep.cpu.model\0"); let stepping: u8 = mach::int_sysctl_by_name(b"machdep.cpu.stepping\0"); - let processor_revision: u16 = (model << 8) | stepping; + let processor_revision = ((model as u16) << 8) | stepping as u16; } else if #[cfg(target_arch = "aarch64")] { let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64; diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index 5306ea00..5fd2c771 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -189,9 +189,9 @@ impl MinidumpWriter { // specifies 32 bits for the flags register, we can truncate safely // with no loss. out.eflags = ts.__rflags as _; - out.cs = ts.__cs; - out.fs = ts.__fs; - out.gs = ts.__gs; + out.cs = ts.__cs as u16; + out.fs = ts.__fs as u16; + out.gs = ts.__gs as u16; } else if #[cfg(target_arch = "aarch64")] { // This is kind of a lie as we don't actually include the full float state..? out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; From c166609b4dee4a895c2d5c572c9ce0d54b8c3439 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 09:38:51 +0200 Subject: [PATCH 12/53] Remove unused import --- src/bin/test.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index 83925f79..802b7303 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -344,7 +344,6 @@ mod windows { #[cfg(target_os = "macos")] mod mac { use super::*; - use std::mem; #[inline(never)] pub(super) fn real_main(_args: Vec) -> Result<()> { From 382526624623d0f72045c754fec836d605dc9212 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 09:39:03 +0200 Subject: [PATCH 13/53] Verify we get valid misc info --- src/mac/streams/misc_info.rs | 140 ++--------------------------------- tests/mac_minidump_writer.rs | 43 +++++++++-- 2 files changed, 43 insertions(+), 140 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 20d42466..2dc49c78 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -49,109 +49,6 @@ impl mach::TaskInfo for TaskThreadsTimeInfo { const FLAVOR: u32 = mach::task_info::TASK_THREAD_TIMES_INFO; } -#[repr(C)] -struct VmSpace { - dummy: i32, - dummy2: *const u8, - dummy3: [i32; 5], - dummy4: [*const u8; 3], -} - -#[repr(C)] -struct ExternProc { - starttime: libc::timeval, // process start time, actually a union, but that's an implementation detail - vmspace: *const VmSpace, // Address space - sigacts: *const u8, // Signal actions, state (PROC ONLY) - flag: i32, // P_* flags - stat: i8, // S* process status - pid: libc::pid_t, // pid - oppid: libc::pid_t, // save parent pid during ptrace - dupfd: i32, // sideways return value from fdopen - /* Mach related */ - user_stack: *const u8, // where user stack was allocated, - exit_thread: *const c_void, // Which thread is exiting? - debugger: i32, // allow to debug - sigwait: i32, // indication to suspend - /* scheduling */ - estcpu: u32, // time averaged value of cpticks - cpticks: i32, // tick of cpu time - pctcpu: u32, // %cpu for this process during swtime - wchan: *const c_void, // sleep address - wmesg: *const i8, // reason for sleep - swtime: u32, // time swapped in or out - slptime: u32, // time since last blocked - realtimer: libc::itimerval, // alarm timer - rtime: libc::timeval, // real time - uticks: u64, // statclock hits in user mode - sticks: u64, // statclock hits in system mode - iticks: u64, // statclock hits processing intr - traceflag: i32, // kernel trace points - tracep: *const c_void, // trace to vnode - siglist: i32, // DEPRECATED - textvp: *const c_void, // vnode of executable - holdcnt: i32, // if non-zero, don't swap - sigmask: libc::sigset_t, // DEPRECATED - sigignore: libc::sigset_t, // signals being ignored - sigcatch: libc::sigset_t, // signals being caught by user - priority: u8, // process priority - usrpri: u8, // user-priority based on cpu and nice - nice: i8, // process "nice" value - comm: [i8; 16 /*MAXCOMLEN*/ + 1], - pgrp: *const c_void, // pointer to process group - addr: *const c_void, // kernel virtual addr of u-area (PROC ONLY) - xstat: u16, // exit status for wait; also stop signal - acflag: u16, // accounting flags - ru: *const c_void, // exit information -} - -#[repr(C)] -struct Pcred { - pc_lock: [i8; 72], // opaque content - pc_ucred: *const c_void, // current credentials - ruid: libc::uid_t, // real user id - svuid: libc::uid_t, // saved effective user id - rgid: libc::gid_t, // real group id - svgid: libc::gid_t, // saved effective group id - refcnt: i32, // number of references -} - -#[repr(C)] -struct Ucred { - refcnt: i32, // reference count - uid: libc::uid_t, // effective user id - ngroups: i16, // number of groups - groups: [libc::gid_t; 16], -} - -#[repr(C)] -struct EProc { - paddr: *const c_void, // address of proc - sess: *const c_void, // session pointer - pcred: Pcred, // process credentials - ucred: Ucred, // current credentials - vm: VmSpace, // address space - ppid: libc::pid_t, // parent process id - pgid: libc::gid_t, // process group id - jobc: i16, // job control counter - tdev: i32, // controlling tty dev - tpgid: libc::gid_t, // tty process group id - tsess: *const c_void, // tty session pointer - wmesg: [i8; 8], // wchan message - xsize: i32, // text size - xrssize: i16, // text rss - xccount: i16, // text references - xswrss: i16, - flag: i32, - login: [i8; 12], // short setlogin() name - spare: [i32; 4], -} - -#[repr(C)] -struct KInfoProc { - kp_proc: ExternProc, - kp_eproc: EProc, -} - impl MinidumpWriter { pub(crate) fn write_misc_info( &mut self, @@ -185,9 +82,13 @@ impl MinidumpWriter { // Note that Breakpad is using `getrusage` to get process times, but that // can only get resource usage for the current process and/or children, // but since we're (most likely) running in a different process than the - // one that has crashed, we instead use the same method that Crashpad - // uses to get the information for the actual crashed process which is - // far more interesting and relevant + // one that has crashed, we instead use `proc_pidinfo` which allows us to + // to retrieve the process time of the actual crashed process. Note that + // this is _also_ different from how Crashpad retrieves the process times, + // it uses sysctl with `CTL_KERN, KERN_PROC, KERN_PROC_PID`, however + // the structs that are filled out by that for this info are not available + // in libc, and frankly `proc_pidinfo` was better documented (well, relatively, + // all Apple documentation is terrible) // // SAFETY: syscall misc_info.process_create_time = unsafe { @@ -211,33 +112,6 @@ impl MinidumpWriter { } else { 0 } - - // let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_PID, pid]; - // let mut kinfo_proc = std::mem::MaybeUninit::::zeroed(); - // let mut len = std::mem::size_of::(); - - // if libc::sysctl( - // mib.as_mut_ptr().cast(), - // std::mem::size_of_val(&mib) as u32, - // kinfo_proc.as_mut_ptr().cast(), - // &mut len, - // ) != 0 - // { - // return Err(std::io::Error::last_os_error().into()); - // } - - // let kinfo_proc = kinfo_proc.assume_init(); - - // // This sysctl does not return an error if the pid was not found. 10.9.5 - // // xnu-2422.115.4/bsd/kern/kern_sysctl.c sysctl_prochandle() calls - // // xnu-2422.115.4/bsd/kern/kern_proc.c proc_iterate(), which provides no - // // indication of whether anything was done. To catch this, check that the PID - // // actually matches the one that we requested - // if kinfo_proc.kp_proc.p_pid != pid { - // 0 - // } else { - // kinfo_proc.kp_proc.starttime.tv_sec as u32 - // } }; // The basic task info keeps the timings for all of the terminated threads diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index c2b5e262..c81f7dfe 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -31,6 +31,10 @@ fn get_crash_reason<'a, T: std::ops::Deref + 'a>( fn dump_external_process() { use std::io::BufRead; + let approximate_proc_start_time = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); let mut child = start_child_and_return(""); let (task, thread) = { @@ -73,13 +77,6 @@ fn dump_external_process() { let md = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); - let _: MinidumpModuleList = md.get_stream().expect("Couldn't find MinidumpModuleList"); - let _: MinidumpThreadList = md.get_stream().expect("Couldn't find MinidumpThreadList"); - let _: MinidumpMemoryList = md.get_stream().expect("Couldn't find MinidumpMemoryList"); - let _: MinidumpSystemInfo = md.get_stream().expect("Couldn't find MinidumpSystemInfo"); - let _: MinidumpBreakpadInfo = md.get_stream().expect("Couldn't find MinidumpBreakpadInfo"); - let _: MinidumpMiscInfo = md.get_stream().expect("Couldn't find MinidumpMiscInfo"); - let crash_reason = get_crash_reason(&md); assert!(matches!( @@ -89,4 +86,36 @@ fn dump_external_process() { 100 ) )); + + let _: MinidumpModuleList = md.get_stream().expect("Couldn't find MinidumpModuleList"); + let _: MinidumpThreadList = md.get_stream().expect("Couldn't find MinidumpThreadList"); + let _: MinidumpMemoryList = md.get_stream().expect("Couldn't find MinidumpMemoryList"); + let _: MinidumpSystemInfo = md.get_stream().expect("Couldn't find MinidumpSystemInfo"); + let _: MinidumpBreakpadInfo = md.get_stream().expect("Couldn't find MinidumpBreakpadInfo"); + + let misc_info: MinidumpMiscInfo = md.get_stream().expect("Couldn't find MinidumpMiscInfo"); + + if let minidump::RawMiscInfo::MiscInfo2(mi) = &misc_info.raw { + // Unfortunately the minidump format only has 32-bit precision for the + // process start time + let process_create_time = mi.process_create_time as u64; + + assert!( + process_create_time >= approximate_proc_start_time + && process_create_time <= approximate_proc_start_time + 2 + ); + + assert!(mi.process_user_time > 0); + assert!(mi.process_kernel_time > 0); + + // These aren't currently available on aarch64, or if they are, they + // are not via the same sysctlbyname mechanism. Would be nice if Apple + // documented...anything + if cfg!(target_arch = "x86_64") { + assert!(mi.processor_max_mhz > 0); + assert!(mi.processor_current_mhz > 0); + } + } else { + panic!("unexpected misc info type {:?}", misc_info); + } } From 717312b783e7124dbe50fa86616c25f4c8dbafdb Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 09:42:51 +0200 Subject: [PATCH 14/53] Add debug prints --- src/mac/streams/misc_info.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 2dc49c78..f30a134f 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -92,23 +92,24 @@ impl MinidumpWriter { // // SAFETY: syscall misc_info.process_create_time = unsafe { - let pid = dumper.pid_for_task()?; + let pid = dbg!(dumper.pid_for_task())?; // Breakpad was using an old method to retrieve this, let's try the // BSD method instead which is already implemented in libc let mut proc_info = std::mem::MaybeUninit::::uninit(); - let size = std::mem::size_of::() as i32; - if libc::proc_pidinfo( - pid, - libc::PROC_PIDTBSDINFO, - 0, - proc_info.as_mut_ptr().cast(), - size, - ) == size - { + let size = dbg!(std::mem::size_of::() as i32); + if dbg!( + libc::proc_pidinfo( + pid, + libc::PROC_PIDTBSDINFO, + 0, + proc_info.as_mut_ptr().cast(), + size, + ) == size + ) { let proc_info = proc_info.assume_init(); - proc_info.pbi_start_tvsec as u32 + dbg!(proc_info.pbi_start_tvsec) as u32 } else { 0 } From 396b2ac384c9aa98aa229108dd54bac535354e16 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 09:58:31 +0200 Subject: [PATCH 15/53] Actually these were what I wanted --- src/mac/streams/misc_info.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index f30a134f..3e44e157 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -3,7 +3,7 @@ use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; use std::{ffi::c_void, time::Duration}; #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] struct TimeValue { seconds: i32, microseconds: i32, @@ -25,6 +25,7 @@ impl From for Duration { } #[repr(C)] +#[derive(Debug)] struct MachTaskBasicInfo { virtual_size: usize, // virtual memory size in bytes resident_size: usize, // resident memory size in bytes @@ -40,6 +41,7 @@ impl mach::TaskInfo for MachTaskBasicInfo { } #[repr(C)] +#[derive(Debug)] struct TaskThreadsTimeInfo { user_time: TimeValue, // total user run time for live threads system_time: TimeValue, // total system run time for live threads @@ -92,34 +94,33 @@ impl MinidumpWriter { // // SAFETY: syscall misc_info.process_create_time = unsafe { - let pid = dbg!(dumper.pid_for_task())?; + let pid = dumper.pid_for_task()?; // Breakpad was using an old method to retrieve this, let's try the // BSD method instead which is already implemented in libc let mut proc_info = std::mem::MaybeUninit::::uninit(); let size = dbg!(std::mem::size_of::() as i32); - if dbg!( - libc::proc_pidinfo( - pid, - libc::PROC_PIDTBSDINFO, - 0, - proc_info.as_mut_ptr().cast(), - size, - ) == size - ) { + if libc::proc_pidinfo( + pid, + libc::PROC_PIDTBSDINFO, + 0, + proc_info.as_mut_ptr().cast(), + size, + ) == size + { let proc_info = proc_info.assume_init(); - dbg!(proc_info.pbi_start_tvsec) as u32 + proc_info.pbi_start_tvsec as u32 } else { 0 } }; // The basic task info keeps the timings for all of the terminated threads - let basic_info = dumper.task_info::().ok(); + let basic_info = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads - let thread_times_info = dumper.task_info::().ok(); + let thread_times_info = dbg!(dumper.task_info::()).ok(); let user_time = basic_info .as_ref() From 915caa355082d0c3602cff674043f711745743b2 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 10:20:45 +0200 Subject: [PATCH 16/53] Use TASK_BASIC_INFO_64 instead --- src/mac/streams/misc_info.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 3e44e157..e6a4615c 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -26,18 +26,17 @@ impl From for Duration { #[repr(C)] #[derive(Debug)] -struct MachTaskBasicInfo { - virtual_size: usize, // virtual memory size in bytes - resident_size: usize, // resident memory size in bytes - resident_size_max: usize, // maximum resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads - suspend_count: i32, // suspend count for task +struct MachTaskBasicInfo64 { + suspend_count: i32, // suspend count for task + virtual_size: usize, // virtual memory size in bytes + resident_size: usize, // resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads } -impl mach::TaskInfo for MachTaskBasicInfo { - const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; +impl mach::TaskInfo for MachTaskBasicInfo64 { + const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; } #[repr(C)] @@ -117,7 +116,7 @@ impl MinidumpWriter { }; // The basic task info keeps the timings for all of the terminated threads - let basic_info = dbg!(dumper.task_info::()).ok(); + let basic_info = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads let thread_times_info = dbg!(dumper.task_info::()).ok(); From 22a5aad69998b529790f7ec8528cdc0d94be74e3 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 10:27:12 +0200 Subject: [PATCH 17/53] Add busy loop so we don't have 0 seconds of user time --- src/bin/test.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/bin/test.rs b/src/bin/test.rs index 802b7303..ad474e34 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -351,6 +351,12 @@ mod mac { let task = mach2::traps::mach_task_self(); let thread = mach2::mach_init::mach_thread_self(); + // Busy loop for 1 second just so we accrue user thread time + let start = std::time::Instant::now(); + while (std::time::Instant::now() - start).as_secs() < 1 { + eprint!("."); + } + println!("{task} {thread}"); // Wait until we're killed From 589bf61cddb137a42f1c0ec04b0eeeba4f56a24c Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 10:32:40 +0200 Subject: [PATCH 18/53] uhm --- src/mac/streams/misc_info.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index e6a4615c..3e44e157 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -26,17 +26,18 @@ impl From for Duration { #[repr(C)] #[derive(Debug)] -struct MachTaskBasicInfo64 { - suspend_count: i32, // suspend count for task - virtual_size: usize, // virtual memory size in bytes - resident_size: usize, // resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads +struct MachTaskBasicInfo { + virtual_size: usize, // virtual memory size in bytes + resident_size: usize, // resident memory size in bytes + resident_size_max: usize, // maximum resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads + suspend_count: i32, // suspend count for task } -impl mach::TaskInfo for MachTaskBasicInfo64 { - const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; +impl mach::TaskInfo for MachTaskBasicInfo { + const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } #[repr(C)] @@ -116,7 +117,7 @@ impl MinidumpWriter { }; // The basic task info keeps the timings for all of the terminated threads - let basic_info = dbg!(dumper.task_info::()).ok(); + let basic_info = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads let thread_times_info = dbg!(dumper.task_info::()).ok(); From 05b10df2000fa03a3765dd3a31721d63e894f487 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 10:38:53 +0200 Subject: [PATCH 19/53] Check both --- src/mac/streams/misc_info.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 3e44e157..81ca2797 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -36,10 +36,25 @@ struct MachTaskBasicInfo { suspend_count: i32, // suspend count for task } -impl mach::TaskInfo for MachTaskBasicInfo { +impl mach::TaskInfo for TaskBasicInfo64 { const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } +#[repr(C)] +#[derive(Debug)] +struct TaskBasicInfo64 { + suspend_count: i32, // suspend count for task + virtual_size: usize, // virtual memory size in bytes + resident_size: usize, // resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads +} + +impl mach::TaskInfo for MachTaskBasicInfo { + const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; +} + #[repr(C)] #[derive(Debug)] struct TaskThreadsTimeInfo { @@ -118,6 +133,7 @@ impl MinidumpWriter { // The basic task info keeps the timings for all of the terminated threads let basic_info = dbg!(dumper.task_info::()).ok(); + let _basic_info64 = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads let thread_times_info = dbg!(dumper.task_info::()).ok(); From 13edc955168155e5388776d5bf4edb106cd0815a Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 10:56:10 +0200 Subject: [PATCH 20/53] Properly pack structs --- src/mac/streams/misc_info.rs | 38 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 81ca2797..52f9840d 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -24,28 +24,28 @@ impl From for Duration { } } -#[repr(C)] +#[repr(C, packed(4))] #[derive(Debug)] struct MachTaskBasicInfo { - virtual_size: usize, // virtual memory size in bytes - resident_size: usize, // resident memory size in bytes - resident_size_max: usize, // maximum resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads - suspend_count: i32, // suspend count for task + virtual_size: u64, // virtual memory size in bytes + resident_size: u64, // resident memory size in bytes + resident_size_max: u64, // maximum resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads + suspend_count: i32, // suspend count for task } impl mach::TaskInfo for TaskBasicInfo64 { const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } -#[repr(C)] +#[repr(C, packed(4))] #[derive(Debug)] struct TaskBasicInfo64 { suspend_count: i32, // suspend count for task - virtual_size: usize, // virtual memory size in bytes - resident_size: usize, // resident memory size in bytes + virtual_size: u64, // virtual memory size in bytes + resident_size: u64, // resident memory size in bytes user_time: TimeValue, // total user run time for terminated threads system_time: TimeValue, // total system run time for terminated threads policy: i32, // default policy for new threads @@ -55,6 +55,21 @@ impl mach::TaskInfo for MachTaskBasicInfo { const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; } +#[repr(C, packed(4))] +#[derive(Debug)] +struct TaskBasicInfo { + suspend_count: i32, // suspend count for task + virtual_size: u64, // virtual memory size in bytes + resident_size: u64, // resident memory size in bytes + user_time: TimeValue, // total user run time for terminated threads + system_time: TimeValue, // total system run time for terminated threads + policy: i32, // default policy for new threads +} + +impl mach::TaskInfo for TaskBasicInfo { + const FLAVOR: u32 = 5; //mach::task_info::TASK_BASIC_INFO; +} + #[repr(C)] #[derive(Debug)] struct TaskThreadsTimeInfo { @@ -134,6 +149,7 @@ impl MinidumpWriter { // The basic task info keeps the timings for all of the terminated threads let basic_info = dbg!(dumper.task_info::()).ok(); let _basic_info64 = dbg!(dumper.task_info::()).ok(); + let _task_basic_info = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads let thread_times_info = dbg!(dumper.task_info::()).ok(); From aa7bcc474d147c2fd522e987960c9ef22d09cde4 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 11:17:00 +0200 Subject: [PATCH 21/53] oops --- src/mac/streams/misc_info.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 52f9840d..40b72883 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -36,7 +36,7 @@ struct MachTaskBasicInfo { suspend_count: i32, // suspend count for task } -impl mach::TaskInfo for TaskBasicInfo64 { +impl mach::TaskInfo for MachTaskBasicInfo { const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } @@ -51,7 +51,7 @@ struct TaskBasicInfo64 { policy: i32, // default policy for new threads } -impl mach::TaskInfo for MachTaskBasicInfo { +impl mach::TaskInfo for TaskBasicInfo64 { const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; } @@ -70,7 +70,7 @@ impl mach::TaskInfo for TaskBasicInfo { const FLAVOR: u32 = 5; //mach::task_info::TASK_BASIC_INFO; } -#[repr(C)] +#[repr(C, packed(4))] #[derive(Debug)] struct TaskThreadsTimeInfo { user_time: TimeValue, // total user run time for live threads From a2010cdbd5041d4e66643036fdfcf5dbbf7d50e9 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 11:58:42 +0200 Subject: [PATCH 22/53] Use a thread to force timings? --- src/bin/test.rs | 12 +++++--- src/mac/streams/misc_info.rs | 53 ++++++++---------------------------- 2 files changed, 19 insertions(+), 46 deletions(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index ad474e34..5d6c12bc 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -352,10 +352,14 @@ mod mac { let thread = mach2::mach_init::mach_thread_self(); // Busy loop for 1 second just so we accrue user thread time - let start = std::time::Instant::now(); - while (std::time::Instant::now() - start).as_secs() < 1 { - eprint!("."); - } + std::thread::spawn(move || { + let start = std::time::Instant::now(); + while (std::time::Instant::now() - start).as_secs() < 1 { + eprint!("."); + } + }) + .join() + .unwrap(); println!("{task} {thread}"); diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 40b72883..2e2ceb09 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -40,36 +40,6 @@ impl mach::TaskInfo for MachTaskBasicInfo { const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } -#[repr(C, packed(4))] -#[derive(Debug)] -struct TaskBasicInfo64 { - suspend_count: i32, // suspend count for task - virtual_size: u64, // virtual memory size in bytes - resident_size: u64, // resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads -} - -impl mach::TaskInfo for TaskBasicInfo64 { - const FLAVOR: u32 = mach::task_info::TASK_BASIC_INFO_64; -} - -#[repr(C, packed(4))] -#[derive(Debug)] -struct TaskBasicInfo { - suspend_count: i32, // suspend count for task - virtual_size: u64, // virtual memory size in bytes - resident_size: u64, // resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads -} - -impl mach::TaskInfo for TaskBasicInfo { - const FLAVOR: u32 = 5; //mach::task_info::TASK_BASIC_INFO; -} - #[repr(C, packed(4))] #[derive(Debug)] struct TaskThreadsTimeInfo { @@ -111,16 +81,10 @@ impl MinidumpWriter { processor_current_idle_state: 0, }; - // Note that Breakpad is using `getrusage` to get process times, but that - // can only get resource usage for the current process and/or children, - // but since we're (most likely) running in a different process than the - // one that has crashed, we instead use `proc_pidinfo` which allows us to - // to retrieve the process time of the actual crashed process. Note that - // this is _also_ different from how Crashpad retrieves the process times, - // it uses sysctl with `CTL_KERN, KERN_PROC, KERN_PROC_PID`, however - // the structs that are filled out by that for this info are not available - // in libc, and frankly `proc_pidinfo` was better documented (well, relatively, - // all Apple documentation is terrible) + // Note that both Breakpad and Crashpad use `sysctl CTL_KERN, KERN_PROC, KERN_PROC_PID` + // to retrieve the process start time, but none of the structures that + // are filled in by that call are in libc at the moment, and `proc_pidinfo` + // seems to work just fine, so using that instead. // // SAFETY: syscall misc_info.process_create_time = unsafe { @@ -146,10 +110,15 @@ impl MinidumpWriter { } }; + // Note that Breakpad is using `getrusage` to retrieve this information, + // however that is wrong, as it can only retrieve the process usage information + // for the current or children processes, not an external process, so + // we use the Crashpad method, which is itself based off of the XNU + // method of retrieving the process times + // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/kern/kern_resource.c#L1215 + // The basic task info keeps the timings for all of the terminated threads let basic_info = dbg!(dumper.task_info::()).ok(); - let _basic_info64 = dbg!(dumper.task_info::()).ok(); - let _task_basic_info = dbg!(dumper.task_info::()).ok(); // THe thread times info keeps the timings for all of the living threads let thread_times_info = dbg!(dumper.task_info::()).ok(); From 97ad56c08651160472614c2e42f43cf72f35ca4b Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 12:01:36 +0200 Subject: [PATCH 23/53] Try harder to use user cpu time --- src/bin/test.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index 5d6c12bc..c64fb947 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -351,15 +351,24 @@ mod mac { let task = mach2::traps::mach_task_self(); let thread = mach2::mach_init::mach_thread_self(); - // Busy loop for 1 second just so we accrue user thread time - std::thread::spawn(move || { + fn count(seconds: u64) { + // Busy loop for 1 second just so we accrue user thread time let start = std::time::Instant::now(); - while (std::time::Instant::now() - start).as_secs() < 1 { - eprint!("."); + let mut counter = 0; + while (std::time::Instant::now() - start).as_secs() < seconds { + counter += 1; + std::thread::sleep(std::time::Duration::from_millis(10)); } - }) - .join() - .unwrap(); + + eprintln!("{:?} counted to {}", std::thread::current().id(), counter); + } + + // Start some threads + let threads = (0..20).map(|_| std::thread::spawn(move || count(1))); + + for thread in threads { + thread.join().unwrap(); + } println!("{task} {thread}"); From 3c36479f0bd6d9a9488ebca3f57635895ede3086 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 20 Apr 2022 12:16:29 +0200 Subject: [PATCH 24/53] Just give up on process times for now --- src/bin/test.rs | 19 ------------------- src/mac/streams/misc_info.rs | 4 ++-- tests/mac_minidump_writer.rs | 12 ++++++++++-- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index c64fb947..802b7303 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -351,25 +351,6 @@ mod mac { let task = mach2::traps::mach_task_self(); let thread = mach2::mach_init::mach_thread_self(); - fn count(seconds: u64) { - // Busy loop for 1 second just so we accrue user thread time - let start = std::time::Instant::now(); - let mut counter = 0; - while (std::time::Instant::now() - start).as_secs() < seconds { - counter += 1; - std::thread::sleep(std::time::Duration::from_millis(10)); - } - - eprintln!("{:?} counted to {}", std::thread::current().id(), counter); - } - - // Start some threads - let threads = (0..20).map(|_| std::thread::spawn(move || count(1))); - - for thread in threads { - thread.join().unwrap(); - } - println!("{task} {thread}"); // Wait until we're killed diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 2e2ceb09..2b5b78c4 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -118,10 +118,10 @@ impl MinidumpWriter { // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/kern/kern_resource.c#L1215 // The basic task info keeps the timings for all of the terminated threads - let basic_info = dbg!(dumper.task_info::()).ok(); + let basic_info = dumper.task_info::().ok(); // THe thread times info keeps the timings for all of the living threads - let thread_times_info = dbg!(dumper.task_info::()).ok(); + let thread_times_info = dumper.task_info::().ok(); let user_time = basic_info .as_ref() diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index c81f7dfe..11a9d634 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -105,8 +105,16 @@ fn dump_external_process() { && process_create_time <= approximate_proc_start_time + 2 ); - assert!(mi.process_user_time > 0); - assert!(mi.process_kernel_time > 0); + // I've tried busy looping to spend CPU time to get this up, but + // MACH_TASK_BASIC_INFO which should give terminated thread times only ever + // reports 0, and TASK_THREAD_TIMES_INFO which should show active thread + // times I've only been able to get upt to a few thousand microseconds + // even when busy looping for well over a second, and those get truncated + // to whole seconds. And it seems that crashpad doesn't have tests around + // this, though that's hard to say given how tedious it is finding stuff + // in that bloated codebase + // assert!(mi.process_user_time > 0); + // assert!(mi.process_kernel_time > 0); // These aren't currently available on aarch64, or if they are, they // are not via the same sysctlbyname mechanism. Would be nice if Apple From 1695497782fcc32765e1b430625d59d5b1fa5aaf Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 21 Apr 2022 07:22:47 +0200 Subject: [PATCH 25/53] Cleanup/docs pass --- src/mac/mach.rs | 136 +++++++++++++++++++------------ src/mac/streams/breakpad_info.rs | 6 ++ src/mac/streams/exception.rs | 4 + src/mac/streams/memory_list.rs | 7 +- src/mac/streams/misc_info.rs | 48 +++++++---- src/mac/streams/module_list.rs | 7 ++ src/mac/streams/system_info.rs | 5 ++ src/mac/streams/thread_list.rs | 5 ++ src/mac/task_dumper.rs | 89 ++++++++++++++------ src/mem_writer.rs | 1 + src/minidump_cpu.rs | 2 + 11 files changed, 217 insertions(+), 93 deletions(-) diff --git a/src/mac/mach.rs b/src/mac/mach.rs index de5031fb..cfaa339e 100644 --- a/src/mac/mach.rs +++ b/src/mac/mach.rs @@ -291,7 +291,7 @@ pub trait TaskInfo { } // usr/include/mach-o/loader.h, the file type for the main executable image -const MH_EXECUTE: u32 = 0x2; +//const MH_EXECUTE: u32 = 0x2; // usr/include/mach-o/loader.h, magic number for MachHeader pub const MH_MAGIC_64: u32 = 0xfeedfacf; // usr/include/mach-o/loader.h, command to map a segment @@ -301,77 +301,98 @@ pub const LC_ID_DYLIB: u32 = 0xd; // usr/include/mach-o/loader.h, the uuid pub const LC_UUID: u32 = 0x1b; -// usr/include/mach-o/loader.h +/// The header at the beginning of every (valid) Mach image +/// +/// #[repr(C)] #[derive(Clone)] pub struct MachHeader { - pub magic: u32, // mach magic number identifier - pub cpu_type: i32, // cpu_type_t cpu specifier - pub cpu_sub_type: i32, // cpu_subtype_t machine specifier - pub file_type: u32, // type of file - pub num_commands: u32, // number of load commands - pub size_commands: u32, // size of all the load commands + /// Mach magic number identifier, this is used to validate the header is valid + pub magic: u32, + /// `cpu_type_t` cpu specifier + pub cpu_type: i32, + /// `cpu_subtype_t` machine specifier + pub cpu_sub_type: i32, + /// Type of file, eg. [`MH_EXECUTE`] for the main executable + pub file_type: u32, + /// Number of load commands for the image + pub num_commands: u32, + /// Size in bytes of all of the load commands + pub size_commands: u32, pub flags: u32, __reserved: u32, } -// usr/include/mach-o/loader.h +/// Every load command is a variable sized struct depending on its type, but +/// they all include the fields in this struct at the beginning +/// +/// #[repr(C)] pub struct LoadCommandBase { - pub cmd: u32, // type of load command - pub cmd_size: u32, // total size of the command in bytes + /// Type of load command `LC_*` + pub cmd: u32, + /// Total size of the command in bytes + pub cmd_size: u32, } -/* - * The 64-bit segment load command indicates that a part of this file is to be - * mapped into a 64-bit task's address space. If the 64-bit segment has - * sections then section_64 structures directly follow the 64-bit segment - * command and their size is reflected in cmdsize. - */ +/// The 64-bit segment load command indicates that a part of this file is to be +/// mapped into a 64-bit task's address space. If the 64-bit segment has +/// sections then section_64 structures directly follow the 64-bit segment +/// command and their size is reflected in `cmdsize`. #[repr(C)] pub struct SegmentCommand64 { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes - pub segment_name: [u8; 16], // string name of the section - pub vm_addr: u64, // memory address the segment is mapped to - pub vm_size: u64, // total size of the segment - pub file_off: u64, // file offset of the segment - pub file_size: u64, // amount mapped from the file - pub max_prot: i32, // maximum VM protection - pub init_prot: i32, // initial VM protection - pub num_sections: u32, // number of sections in the segment + cmd: u32, + pub cmd_size: u32, + /// String name of the section + pub segment_name: [u8; 16], + /// Memory address the segment is mapped to + pub vm_addr: u64, + /// Total size of the segment + pub vm_size: u64, + /// File offset of the segment + pub file_off: u64, + /// Amount mapped from the file + pub file_size: u64, + /// Maximum VM protection + pub max_prot: i32, + /// Initial VM protection + pub init_prot: i32, + /// Number of sections in the segment + pub num_sections: u32, pub flags: u32, } -/* - * Dynamically linked shared libraries are identified by two things. The - * pathname (the name of the library as found for execution), and the - * compatibility version number. The pathname must match and the compatibility - * number in the user of the library must be greater than or equal to the - * library being used. The time stamp is used to record the time a library was - * built and copied into user so it can be use to determined if the library used - * at runtime is exactly the same as used to built the program. - */ +/// Dynamically linked shared libraries are identified by two things. The +/// pathname (the name of the library as found for execution), and the +/// compatibility version number. The pathname must match and the compatibility +/// number in the user of the library must be greater than or equal to the +/// library being used. The time stamp is used to record the time a library was +/// built and copied into user so it can be use to determined if the library used +/// at runtime is exactly the same as used to built the program. #[repr(C)] pub struct Dylib { - pub name: u32, // offset from the load command start to the pathname - pub timestamp: u32, // library's build time stamp - pub current_version: u32, // library's current version number - pub compatibility_version: u32, // library's compatibility vers number + /// Offset from the load command start to the pathname + pub name: u32, + /// Library's build time stamp + pub timestamp: u32, + /// Library's current version number + pub current_version: u32, + /// Library's compatibility version number + pub compatibility_version: u32, } -/* - * A dynamically linked shared library (filetype == MH_DYLIB in the mach header) - * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. - * An object that uses a dynamically linked shared library also contains a - * dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or - * LC_REEXPORT_DYLIB) for each library it uses. - */ +/// A dynamically linked shared library (filetype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. #[repr(C)] pub struct DylibCommand { - cmd: u32, // type of load command - cmd_size: u32, // total size of the command in bytes, including pathname string - pub dylib: Dylib, // library identification + cmd: u32, + /// Total size of the command in bytes, including pathname string + pub cmd_size: u32, + /// Library identification + pub dylib: Dylib, } /// The uuid load command contains a single 128-bit unique random number that @@ -379,7 +400,8 @@ pub struct DylibCommand { #[repr(C)] pub struct UuidCommand { cmd: u32, - cmd_size: u32, + pub cmd_size: u32, + /// The UUID. The components are in big-endian regardless of the host architecture pub uuid: [u8; 16], } @@ -392,6 +414,7 @@ pub struct LoadCommands { } impl LoadCommands { + /// Retrieves an iterator over the load commands in the contained buffer #[inline] pub fn iter(&self) -> LoadCommandsIter<'_> { LoadCommandsIter { @@ -401,6 +424,7 @@ impl LoadCommands { } } +/// A single load command pub enum LoadCommand<'buf> { Segment(&'buf SegmentCommand64), Dylib(&'buf DylibCommand), @@ -534,3 +558,13 @@ pub fn sysctl_string(name: &[u8]) -> String { String::from_utf8(string_buf).unwrap_or_default() } + +extern "C" { + /// From , this retrieves the normal PID for + /// the specified task as the syscalls from BSD use PIDs, not mach ports. + /// + /// This seems to be marked as "obsolete" in the header, but of course being + /// Apple, there is no mention of a replacement function or when/if it might + /// eventually disappear. + pub fn pid_for_task(task: mach_port_name_t, pid: *mut i32) -> kern_return_t; +} diff --git a/src/mac/streams/breakpad_info.rs b/src/mac/streams/breakpad_info.rs index 79188998..d8f1e587 100644 --- a/src/mac/streams/breakpad_info.rs +++ b/src/mac/streams/breakpad_info.rs @@ -2,6 +2,12 @@ use super::*; use format::{BreakpadInfoValid, MINIDUMP_BREAKPAD_INFO as BreakpadInfo}; impl MinidumpWriter { + /// Writes the [`BreakpadInfo`] stream. + /// + /// For MacOS the primary use of this stream is to differentiate between + /// the thread that actually raised an exception, and the thread on which + /// the exception port was listening, so that the exception port (handler) + /// thread can be deprioritized/ignored when analyzing the minidump. pub(crate) fn write_breakpad_info( &mut self, buffer: &mut DumpBuf, diff --git a/src/mac/streams/exception.rs b/src/mac/streams/exception.rs index ab7507e9..69aabc1a 100644 --- a/src/mac/streams/exception.rs +++ b/src/mac/streams/exception.rs @@ -1,6 +1,10 @@ use super::*; impl MinidumpWriter { + /// Writes the [`minidump_common::format::MINIDUMP_EXCEPTION_STREAM`] stream. + /// + /// This stream is optional on MacOS as a user requested minidump could + /// choose not to specify the exception information. pub(crate) fn write_exception( &mut self, buffer: &mut DumpBuf, diff --git a/src/mac/streams/memory_list.rs b/src/mac/streams/memory_list.rs index 240b7ce5..9ead2027 100644 --- a/src/mac/streams/memory_list.rs +++ b/src/mac/streams/memory_list.rs @@ -1,6 +1,9 @@ use super::*; impl MinidumpWriter { + /// Writes the [`MDStreamType::MemoryListStream`]. The memory blocks that are + /// written into this stream are the raw thread contexts that were retrieved + /// and added by [`Self::write_thread_list`] pub(crate) fn write_memory_list( &mut self, buffer: &mut DumpBuf, @@ -8,9 +11,9 @@ impl MinidumpWriter { ) -> Result { // Include some memory around the instruction pointer if the crash was // due to an exception - const IP_MEM_SIZE: u64 = 256; - if self.crash_context.exception.is_some() { + const IP_MEM_SIZE: u64 = 256; + let get_ip_block = |tid| -> Option> { let thread_state = dumper.read_thread_state(tid).ok()?; diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 2b5b78c4..938bb2ab 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -1,9 +1,10 @@ use super::*; use format::{MiscInfoFlags, MINIDUMP_MISC_INFO_2 as MDRawMiscInfo}; -use std::{ffi::c_void, time::Duration}; +use std::time::Duration; +/// From #[repr(C)] -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone)] struct TimeValue { seconds: i32, microseconds: i32, @@ -24,27 +25,38 @@ impl From for Duration { } } +/// From , this includes basic information about +/// a task. #[repr(C, packed(4))] -#[derive(Debug)] struct MachTaskBasicInfo { - virtual_size: u64, // virtual memory size in bytes - resident_size: u64, // resident memory size in bytes - resident_size_max: u64, // maximum resident memory size in bytes - user_time: TimeValue, // total user run time for terminated threads - system_time: TimeValue, // total system run time for terminated threads - policy: i32, // default policy for new threads - suspend_count: i32, // suspend count for task + /// Virtual memory size in bytes + virtual_size: u64, + /// Resident memory size in bytes + resident_size: u64, + /// Maximum resident memory size in bytes + resident_size_max: u64, + /// Total user run time for terminated threads + user_time: TimeValue, + /// Total system run time for terminated threads + system_time: TimeValue, + /// Default policy for new threads + policy: i32, + /// Suspend count for task + suspend_count: i32, } impl mach::TaskInfo for MachTaskBasicInfo { const FLAVOR: u32 = mach::task_info::MACH_TASK_BASIC_INFO; } +/// From , this includes times for currently +/// live threads in the task. #[repr(C, packed(4))] -#[derive(Debug)] struct TaskThreadsTimeInfo { - user_time: TimeValue, // total user run time for live threads - system_time: TimeValue, // total system run time for live threads + /// Total user run time for live threads + user_time: TimeValue, + /// total system run time for live threads + system_time: TimeValue, } impl mach::TaskInfo for TaskThreadsTimeInfo { @@ -52,6 +64,14 @@ impl mach::TaskInfo for TaskThreadsTimeInfo { } impl MinidumpWriter { + /// Writes the [`MDStreamType::MiscInfoStream`] stream. + /// + /// On MacOS, we write a [`minidump_common::format::MINIDUMP_MISC_INFO_2`] + /// to this stream, which includes the start time of the process at second + /// granularity, and the (approximate) amount of time spent in user and + /// system (kernel) time for the lifetime of the task. We attempt to also + /// retrieve power ie CPU usage statistics, though this information is only + /// currently available on x86_64, not aarch64 at the moment. pub(crate) fn write_misc_info( &mut self, buffer: &mut DumpBuf, @@ -88,8 +108,6 @@ impl MinidumpWriter { // // SAFETY: syscall misc_info.process_create_time = unsafe { - let pid = dumper.pid_for_task()?; - // Breakpad was using an old method to retrieve this, let's try the // BSD method instead which is already implemented in libc let mut proc_info = std::mem::MaybeUninit::::uninit(); diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index 7d13a6a2..a2d0f1dc 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -1,6 +1,13 @@ use super::*; impl MinidumpWriter { + /// Writes the [`MDStreamType::ModuleListStream`] to the minidump, which is + /// the last of all loaded modules (images) in the process. + /// + /// Notably, this includes the UUID of the image which is needed to look up + /// debug symbols for the module, as well as the address range covered by + /// the module to know which debug symbols are used to resolve which instruction + /// addresses pub(crate) fn write_module_list( &mut self, buffer: &mut DumpBuf, diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs index 0fbbc030..aef90bbd 100644 --- a/src/mac/streams/system_info.rs +++ b/src/mac/streams/system_info.rs @@ -125,6 +125,11 @@ fn read_cpu_info(cpu: &mut format::CPU_INFORMATION) { } impl MinidumpWriter { + /// Writes the [`MDStreamType::SystemInfoStream`] stream. + /// + /// On MacOS we includes basic CPU information, though some of it is not + /// available on `aarch64` at the time of this writing, as well as kernel + /// version information. pub(crate) fn write_system_info( &mut self, buffer: &mut DumpBuf, diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index 5fd2c771..ff6e41c6 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -2,6 +2,8 @@ use super::*; use crate::minidump_cpu::RawContextCPU; impl MinidumpWriter { + /// Writes the [`MDStreamType::ThreadListStream`] which is an array of + /// [`miniduimp_common::format::MINIDUMP_THREAD`] pub(crate) fn write_thread_list( &mut self, buffer: &mut DumpBuf, @@ -103,6 +105,9 @@ impl MinidumpWriter { }; thread.stack.memory = stack_location; + + // Add the stack memory as a raw block of memory, this is written to + // the minidump as part of the memory list stream self.memory_blocks.push(thread.stack); Ok(()) } diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs index cd7071ed..a23ea7dd 100644 --- a/src/mac/task_dumper.rs +++ b/src/mac/task_dumper.rs @@ -40,12 +40,28 @@ macro_rules! mach_call { }}; } -// dyld_image_info +/// `dyld_all_image_infos` from +/// +/// This struct is truncated as we only need a couple of fields at the beginning +/// of the struct +#[repr(C)] +struct AllImagesInfo { + version: u32, // == 1 in Mac OS X 10.4 + /// The number of [`ImageInfo`] structs at that following address + info_array_count: u32, + /// The address in the process where the array of [`ImageInfo`] structs is + info_array_addr: u64, +} + +/// `dyld_image_info` from #[repr(C)] #[derive(Clone)] pub struct ImageInfo { + /// The address in the process where the image is loaded pub load_address: u64, + /// The address in the process where the image's file path can be read pub file_path: u64, + /// Timestamp for when the image's file was last modified pub file_mod_date: u64, } @@ -93,6 +109,10 @@ impl TaskDumper { } /// Reads a block of memory from the task + /// + /// # Errors + /// + /// The syscall to read the task's memory fails for some reason, eg bad address. pub fn read_task_memory(&self, address: u64, count: usize) -> Result, TaskDumpError> where T: Sized + Clone, @@ -191,14 +211,19 @@ impl TaskDumper { } /// Retrives information on the virtual memory region the specified address - /// is located within + /// is located within. + /// + /// # Errors + /// + /// The syscall to retrieve the VM region information fails for some reason, + /// eg. a bad address. pub fn get_vm_region(&self, addr: u64) -> Result { let mut region_base = addr; let mut region_size = 0; let mut nesting_level = 0; let mut submap_info = std::mem::MaybeUninit::::uninit(); - // mach/vm_region.h + // const VM_REGION_SUBMAP_INFO_COUNT_64: u32 = (std::mem::size_of::() / std::mem::size_of::()) as u32; @@ -221,8 +246,13 @@ impl TaskDumper { }) } - /// Retrieves the state of the specified thread. The state is is an architecture + /// Retrieves the state of the specified thread. The state is an architecture /// specific block of CPU context ie register state. + /// + /// # Errors + /// + /// The specified thread id is invalid, or the thread is in a task that is + /// compiled for a different architecture than this local task. pub fn read_thread_state(&self, tid: u32) -> Result { let mut thread_state = mach::ThreadState::default(); @@ -236,7 +266,12 @@ impl TaskDumper { Ok(thread_state) } - /// Reads the specified task information + /// Reads the specified task information. + /// + /// # Errors + /// + /// The syscall to receive the task information failed for some reason, eg. + /// the specified type and the flavor are mismatched and considered invalid. pub fn task_info(&self) -> Result { let mut info = std::mem::MaybeUninit::::uninit(); let mut count = (std::mem::size_of::() / std::mem::size_of::()) as u32; @@ -252,8 +287,14 @@ impl TaskDumper { unsafe { Ok(info.assume_init()) } } - /// Retrieves all of the images loaded in the task. Note that there may be - /// multiple images with the same load address. + /// Retrieves all of the images loaded in the task. + /// + /// Note that there may be multiple images with the same load address. + /// + /// # Errors + /// + /// The syscall to retrieve the location of the loaded images fails, or + /// the syscall to read the loaded images from the process memory fails pub fn read_images(&self) -> Result, TaskDumpError> { impl mach::TaskInfo for mach::task_info::task_dyld_info { const FLAVOR: u32 = mach::task_info::TASK_DYLD_INFO; @@ -266,15 +307,6 @@ impl TaskDumper { dyld_info.all_image_info_addr }; - // dyld_all_image_infos defined in usr/include/mach-o/dyld_images.h, we - // only need a couple of fields at the beginning - #[repr(C)] - struct AllImagesInfo { - version: u32, // == 1 in Mac OS X 10.4 - info_array_count: u32, - info_array_addr: u64, - } - // Here we make the assumption that dyld loaded at the same address in // the crashed process vs. this one. This is an assumption made in // "dyld_debug.c" and is said to be nearly always valid. @@ -290,6 +322,12 @@ impl TaskDumper { } /// Retrieves the load commands for the specified image + /// + /// # Errors + /// + /// We fail to read the image header for the specified image, the header we + /// read is determined to be invalid, or we fail to read the block of memory + /// containing the load commands themselves. pub fn read_load_commands(&self, img: &ImageInfo) -> Result { let mach_header = self.read_task_memory::(img.load_address, 1)?; @@ -315,6 +353,10 @@ impl TaskDumper { } /// Gets a list of all of the thread ids in the task + /// + /// # Errors + /// + /// The syscall to retrieve the list of threads fails pub fn read_threads(&self) -> Result<&'static [u32], TaskDumpError> { let mut threads = std::ptr::null_mut(); let mut thread_count = 0; @@ -332,17 +374,14 @@ impl TaskDumper { } /// Retrieves the PID for the task + /// + /// # Errors + /// + /// Presumably the only way this would fail would be if the task we are + /// dumping disappears. pub fn pid_for_task(&self) -> Result { - extern "C" { - /// /usr/include/mach/mach_traps.h - /// - /// This seems to be marked as "obsolete" so might disappear at some point? - fn pid_for_task(task: mach::mach_port_name_t, pid: *mut i32) -> mach::kern_return_t; - } - let mut pid = 0; - mach_call!(pid_for_task(self.task, &mut pid))?; - + mach_call!(mach::pid_for_task(self.task, &mut pid))?; Ok(pid) } } diff --git a/src/mem_writer.rs b/src/mem_writer.rs index 5b5b6298..a723c2aa 100644 --- a/src/mem_writer.rs +++ b/src/mem_writer.rs @@ -146,6 +146,7 @@ pub struct MemoryArrayWriter { phantom: std::marker::PhantomData, } +#[cfg(any(target_os = "linux", target_os = "android"))] impl MemoryArrayWriter { #[inline] pub fn write_bytes(buffer: &mut Buffer, slice: &[u8]) -> Self { diff --git a/src/minidump_cpu.rs b/src/minidump_cpu.rs index 4130939e..6afc9402 100644 --- a/src/minidump_cpu.rs +++ b/src/minidump_cpu.rs @@ -11,8 +11,10 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "aarch64")] { /// This is the number of general purpose registers _not_ counting /// the stack pointer + #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) const GP_REG_COUNT: usize = 31; /// The number of floating point registers in the floating point save area + #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) const FP_REG_COUNT: usize = 32; pub type RawContextCPU = minidump_common::format::CONTEXT_ARM64_OLD; From 3e473b0a9fc725f242a4d9c725b6ee61195e7046 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 21 Apr 2022 07:23:12 +0200 Subject: [PATCH 26/53] Add task_dumper test for load command iteration --- Cargo.toml | 4 ++ src/mac.rs | 4 +- tests/task_dumper.rs | 119 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 tests/task_dumper.rs diff --git a/Cargo.toml b/Cargo.toml index ea98e7a9..ec8ded6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,10 @@ mach2 = "0.4" [dev-dependencies] minidump = "0.10" +[target.'cfg(target_os = "macos")'.dev-dependencies] +similar-asserts = "1.2" +uuid = "1.0" + [patch.crates-io] minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "macos" } diff --git a/src/mac.rs b/src/mac.rs index 4f8edd0d..3e7fd59d 100644 --- a/src/mac.rs +++ b/src/mac.rs @@ -2,7 +2,7 @@ compile_error!("Various MacOS FFI bindings assume we are on a 64-bit architechture"); pub mod errors; -mod mach; +pub mod mach; pub mod minidump_writer; mod streams; -mod task_dumper; +pub mod task_dumper; diff --git a/tests/task_dumper.rs b/tests/task_dumper.rs new file mode 100644 index 00000000..806fed9e --- /dev/null +++ b/tests/task_dumper.rs @@ -0,0 +1,119 @@ +//! All of these tests are specific to the MacOS task dumper +#![cfg(target_os = "macos")] + +use minidump_writer::{mach::LoadCommand, task_dumper::TaskDumper}; +use std::fmt::Write; + +fn call_otool(args: &[&str]) -> String { + let mut cmd = std::process::Command::new("otool"); + cmd.args(args); + + let exe_path = std::env::current_exe().expect("unable to retrieve test executable path"); + cmd.arg(exe_path); + + let output = cmd.output().expect("failed to spawn otool"); + + assert!(output.status.success()); + + String::from_utf8(output.stdout).expect("stdout was invalid utf-8") +} + +/// Validates we can iterate the load commands for all of the images in the task +#[test] +fn iterates_load_commands() { + let lc_str = call_otool(&["-l"]); + + let mut expected = String::new(); + let mut lc_index = 0; + + while let Some(nlc) = lc_str[lc_index..].find("Load command ") { + lc_index += nlc; + + let block = match lc_str[lc_index + 13..].find("Load command ") { + Some(ind) => &lc_str[lc_index + 13..lc_index + 13 + ind], + None => &lc_str[lc_index..], + }; + + let cmd = block + .find("cmd ") + .expect("load commnd didn't specify cmd kind"); + let cmd_end = block[cmd + 4..] + .find('\n') + .expect("load cmd didn't end with newline"); + if matches!( + &block[cmd + 4..cmd_end], + "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" + ) { + expected.push_str(block); + } + } + + let task_dumper = TaskDumper::new( + // SAFETY: syscall + unsafe { mach2::traps::mach_task_self() }, + ); + + let mut actual = String::new(); + let images = task_dumper.read_images().expect("failed to read images"); + + for img in images { + let lcmds = task_dumper + .read_load_commands(&img) + .expect("failed to read load commands"); + + for lc in lcmds.iter() { + match lc { + LoadCommand::Segment(seg) => { + write!( + &mut actual, + " + cmd LC_SEGMENT_64 + cmdsize {} + segname {} + vmaddr 0x{:x} + vmsize 0x{:x} + fileoff {} + filesize {} + maxprot 0x{:x} + initprot 0x{:x} + nsects {} + flags 0x{:x} +", + seg.cmd_size, + std::str::from_utf8(&seg.segment_name).unwrap(), + seg.vm_addr, + seg.vm_size, + seg.file_off, + seg.file_size, + seg.max_prot, + seg.init_prot, + seg.num_sections, + seg.flags, + ) + .unwrap(); + } + LoadCommand::Dylib(_dylib) => { + unreachable!() + } + LoadCommand::Uuid(uuid) => { + let id = uuid::Uuid::from_bytes(uuid.uuid); + let mut uuid_buf = [0u8; uuid::fmt::Hyphenated::LENGTH]; + let uuid_str = id.hyphenated().encode_upper(&mut uuid_buf); + + write!( + &mut actual, + " + cmd LC_UUID + cmdsize {} + uuid {uuid_str} +", + uuid.cmd_size, + ) + .unwrap(); + } + } + } + } + + similar_asserts::assert_str_eq!(expected, actual); +} From 7dbd4c568d42f707e6f6c8e8a4e7c7d59d3c8365 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 21 Apr 2022 08:28:39 +0200 Subject: [PATCH 27/53] Verify test works on macos --- src/mac/mach.rs | 5 ++-- src/mac/task_dumper.rs | 29 ++++++++++++++++++++++ tests/task_dumper.rs | 56 +++++++++++++++++++++++++++++++----------- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/mac/mach.rs b/src/mac/mach.rs index cfaa339e..9212c0ec 100644 --- a/src/mac/mach.rs +++ b/src/mac/mach.rs @@ -290,8 +290,8 @@ pub trait TaskInfo { const FLAVOR: u32; } -// usr/include/mach-o/loader.h, the file type for the main executable image -//const MH_EXECUTE: u32 = 0x2; +/// , the file type for the main executable image +pub const MH_EXECUTE: u32 = 0x2; // usr/include/mach-o/loader.h, magic number for MachHeader pub const MH_MAGIC_64: u32 = 0xfeedfacf; // usr/include/mach-o/loader.h, command to map a segment @@ -370,6 +370,7 @@ pub struct SegmentCommand64 { /// built and copied into user so it can be use to determined if the library used /// at runtime is exactly the same as used to built the program. #[repr(C)] +#[derive(Debug)] pub struct Dylib { /// Offset from the load command start to the pathname pub name: u32, diff --git a/src/mac/task_dumper.rs b/src/mac/task_dumper.rs index a23ea7dd..417aaf4d 100644 --- a/src/mac/task_dumper.rs +++ b/src/mac/task_dumper.rs @@ -321,6 +321,35 @@ impl TaskDumper { ) } + /// Retrieves the main executable image for the task. + /// + /// Note that this method is currently only used for tests due to deficiencies + /// in `otool` + /// + /// # Errors + /// + /// Any of the errors that apply to [`Self::read_images`] apply here, in + /// addition to not being able to find the main executable image + pub fn read_executable_image(&self) -> Result { + let images = self.read_images()?; + + for img in images { + let mach_header = self.read_task_memory::(img.load_address, 1)?; + + let header = &mach_header[0]; + + if header.magic != mach::MH_MAGIC_64 { + return Err(TaskDumpError::InvalidMachHeader); + } + + if header.file_type == mach::MH_EXECUTE { + return Ok(img); + } + } + + Err(TaskDumpError::NoExecutableImage) + } + /// Retrieves the load commands for the specified image /// /// # Errors diff --git a/tests/task_dumper.rs b/tests/task_dumper.rs index 806fed9e..abaca5a8 100644 --- a/tests/task_dumper.rs +++ b/tests/task_dumper.rs @@ -26,6 +26,8 @@ fn iterates_load_commands() { let mut expected = String::new(); let mut lc_index = 0; + expected.push('\n'); + while let Some(nlc) = lc_str[lc_index..].find("Load command ") { lc_index += nlc; @@ -34,17 +36,31 @@ fn iterates_load_commands() { None => &lc_str[lc_index..], }; + // otool prints the load command index for each command, but we only + // handle the small subset of the available load commands we care about + // so just ignore that + let block = &block[block.find('\n').unwrap() + 1..]; + + // otool also prints all the sections for LC_SEGMENT_* commands, but + // we don't care about those, so ignore them + let block = match block.find("Section") { + Some(ind) => &block[..ind], + None => block, + }; + + lc_index += 13; + let cmd = block .find("cmd ") .expect("load commnd didn't specify cmd kind"); - let cmd_end = block[cmd + 4..] + let cmd_end = block[cmd..] .find('\n') .expect("load cmd didn't end with newline"); if matches!( - &block[cmd + 4..cmd_end], + dbg!(&block[cmd + 4..cmd + cmd_end]), "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" ) { - expected.push_str(block); + expected.push_str(dbg!(block)); } } @@ -54,33 +70,45 @@ fn iterates_load_commands() { ); let mut actual = String::new(); - let images = task_dumper.read_images().expect("failed to read images"); - for img in images { + // Unfortunately, Apple decided to move dynamic libs into a shared cache, + // removing them from the file system completely, and unless I'm missing it + // there is no way to get the load commands for the dylibs since otool + // only understands file paths? So we just get the load commands for the main + // executable instead, this means that we miss the `LC_ID_DYLIB` commands + // since they only apply to dylibs, but this test is more that we can + // correctly iterate through the load commands themselves, so this _should_ + // be fine... + let exe_img = task_dumper + .read_executable_image() + .expect("failed to read executable image"); + + { let lcmds = task_dumper - .read_load_commands(&img) + .read_load_commands(&exe_img) .expect("failed to read load commands"); for lc in lcmds.iter() { match lc { LoadCommand::Segment(seg) => { + let segname = std::str::from_utf8(&seg.segment_name).unwrap(); + let segname = &segname[..segname.find('\0').unwrap()]; write!( &mut actual, " cmd LC_SEGMENT_64 cmdsize {} segname {} - vmaddr 0x{:x} - vmsize 0x{:x} + vmaddr 0x{:016x} + vmsize 0x{:016x} fileoff {} filesize {} - maxprot 0x{:x} - initprot 0x{:x} + maxprot 0x{:08x} + initprot 0x{:08x} nsects {} - flags 0x{:x} -", + flags 0x{:x}", seg.cmd_size, - std::str::from_utf8(&seg.segment_name).unwrap(), + segname, seg.vm_addr, seg.vm_size, seg.file_off, @@ -93,7 +121,7 @@ fn iterates_load_commands() { .unwrap(); } LoadCommand::Dylib(_dylib) => { - unreachable!() + unreachable!(); } LoadCommand::Uuid(uuid) => { let id = uuid::Uuid::from_bytes(uuid.uuid); From 5852ffabb69700d75e59abdfa91bd1f898c22bab Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 21 Apr 2022 14:11:22 +0200 Subject: [PATCH 28/53] Remove unneeded crash-context patch --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ec8ded6b..3c9e1d53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,4 +54,3 @@ uuid = "1.0" [patch.crates-io] minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } -crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "macos" } From 13609675653e6384b2cce120c4b06a6bda18db00 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 21 Apr 2022 16:41:00 +0200 Subject: [PATCH 29/53] oops, remove dbg! code --- src/mac/streams/misc_info.rs | 2 +- tests/task_dumper.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mac/streams/misc_info.rs b/src/mac/streams/misc_info.rs index 938bb2ab..629b94ce 100644 --- a/src/mac/streams/misc_info.rs +++ b/src/mac/streams/misc_info.rs @@ -111,7 +111,7 @@ impl MinidumpWriter { // Breakpad was using an old method to retrieve this, let's try the // BSD method instead which is already implemented in libc let mut proc_info = std::mem::MaybeUninit::::uninit(); - let size = dbg!(std::mem::size_of::() as i32); + let size = std::mem::size_of::() as i32; if libc::proc_pidinfo( pid, libc::PROC_PIDTBSDINFO, diff --git a/tests/task_dumper.rs b/tests/task_dumper.rs index abaca5a8..6f04c396 100644 --- a/tests/task_dumper.rs +++ b/tests/task_dumper.rs @@ -57,10 +57,10 @@ fn iterates_load_commands() { .find('\n') .expect("load cmd didn't end with newline"); if matches!( - dbg!(&block[cmd + 4..cmd + cmd_end]), + &block[cmd + 4..cmd + cmd_end], "LC_SEGMENT_64" | "LC_UUID" | "LC_ID_DYLIB" ) { - expected.push_str(dbg!(block)); + expected.push_str(block); } } From 0f361dab420cf76c31e33e2fc5429497d75fcf95 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 25 Apr 2022 16:27:04 +0200 Subject: [PATCH 30/53] Oops, we were dropping a register --- src/mac/streams/thread_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index ff6e41c6..8ae4785e 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -202,7 +202,7 @@ impl MinidumpWriter { out.context_flags = format::ContextFlagsArm64Old::CONTEXT_ARM64_OLD_FULL.bits() as u64; out.cpsr = ts.cpsr; - out.iregs[..28].copy_from_slice(&ts.x[..28]); + out.iregs[..29].copy_from_slice(&ts.x[..29]); out.iregs[29] = ts.fp; out.iregs[30] = ts.lr; out.sp = ts.sp; From 7d4aecf2fd83e6aaca54b924588a528f7649c879 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 25 Apr 2022 16:28:00 +0200 Subject: [PATCH 31/53] Use correct arm64 processor arch I think this may have been causing sentry/minidump-stackwalk to fail to read the CPU context --- src/mac/streams/system_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mac/streams/system_info.rs b/src/mac/streams/system_info.rs index aef90bbd..aac2de57 100644 --- a/src/mac/streams/system_info.rs +++ b/src/mac/streams/system_info.rs @@ -159,7 +159,7 @@ impl MinidumpWriter { let processor_revision = ((model as u16) << 8) | stepping as u16; } else if #[cfg(target_arch = "aarch64")] { - let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64; + let processor_architecture = MDCPUArchitecture::PROCESSOR_ARCHITECTURE_ARM64_OLD; let family: u32 = mach::sysctl_by_name(b"hw.cpufamily\0"); From ff3c98bfd1d201fc892599affb705f59786b1dd8 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 26 Apr 2022 07:52:11 +0200 Subject: [PATCH 32/53] Remove superfluous 'what' comment --- src/mac/streams/module_list.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index a2d0f1dc..991de0d3 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -153,12 +153,7 @@ impl MinidumpWriter { raw_module.version_info.struct_version = format::VS_FFI_STRUCVERSION; // Convert MAC dylib version format, which is a 32 bit number, to the - // format used by minidump. The mac format is <16 bits>.<8 bits>.<8 bits> - // so it fits nicely into the windows version with some massaging - // The mapping is: - // 1) upper 16 bits of MAC version go to lower 16 bits of product HI - // 2) Next most significant 8 bits go to upper 16 bits of product LO - // 3) Least significant 8 bits go to lower 16 bits of product LO + // format used by minidump. raw_module.version_info.file_version_hi = version >> 16; raw_module.version_info.file_version_lo = ((version & 0xff00) << 8) | (version & 0xff); } From 4342fdeee6c3a7deed47412c16d1aad404523375 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 26 Apr 2022 07:57:38 +0200 Subject: [PATCH 33/53] Hopefully fix slide calculation --- src/mac/streams/module_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mac/streams/module_list.rs b/src/mac/streams/module_list.rs index 991de0d3..44213a4f 100644 --- a/src/mac/streams/module_list.rs +++ b/src/mac/streams/module_list.rs @@ -95,7 +95,7 @@ impl MinidumpWriter { mach::LoadCommand::Segment(seg) if sizes.is_none() => { if &seg.segment_name[..7] == b"__TEXT\0" { let slide = if seg.file_off == 0 && seg.file_size != 0 { - (image.load_address - seg.vm_addr) as isize + image.load_address as isize - seg.vm_addr as isize } else { 0 }; From b57038a97323938d8576c3f2f12a5d0c0d8688d8 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 26 Apr 2022 18:15:55 +0200 Subject: [PATCH 34/53] Checkpoint of fail --- Cargo.toml | 4 ++ src/bin/test.rs | 43 +++++++++++++---- tests/mac_minidump_writer.rs | 90 +++++++++++++++++++++++++++++++----- 3 files changed, 117 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c9e1d53..85063712 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,11 @@ features = [ mach2 = "0.4" [dev-dependencies] +# Sigh, minidump-processor is async for some reason so we need an executor :( +futures = { version = "0.3", features = ["executor"] } minidump = "0.10" +minidump-processor = "0.10" +memmap2 = "0.5" [target.'cfg(target_os = "macos")'.dev-dependencies] similar-asserts = "1.2" diff --git a/src/bin/test.rs b/src/bin/test.rs index 802b7303..eba6c60e 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -347,17 +347,42 @@ mod mac { #[inline(never)] pub(super) fn real_main(_args: Vec) -> Result<()> { - unsafe { - let task = mach2::traps::mach_task_self(); - let thread = mach2::mach_init::mach_thread_self(); - println!("{task} {thread}"); + dbg!(unsafe { libc::_dyld_image_count() }); + std::thread::Builder::new() + .name("test-thread".to_owned()) + .spawn(move || { + #[inline(never)] + fn wait_until_killed() { + unsafe { + let task = dbg!(mach2::traps::mach_task_self()); + let pid = dbg!(std::process::id()); + let thread = dbg!(mach2::mach_init::mach_thread_self()); + + let mut real_task = 0; + dbg!(mach2::traps::task_for_pid( + task, + pid as i32, + &mut real_task + )); + dbg!(real_task); + + println!("{task} {thread}"); + + // Wait until we're killed + loop { + std::thread::park(); + } + } + } - // Wait until we're killed - loop { - std::thread::park(); - } - } + wait_until_killed() + }) + .unwrap() + .join() + .unwrap(); + + Ok(()) } } diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index 11a9d634..59591bd4 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -27,14 +27,15 @@ fn get_crash_reason<'a, T: std::ops::Deref + 'a>( ) } -#[test] -fn dump_external_process() { +struct Captured<'md> { + task: u32, + thread: u32, + minidump: Minidump<'md, memmap2::Mmap>, +} + +fn capture_minidump(name: &str) -> Captured<'_> { use std::io::BufRead; - let approximate_proc_start_time = std::time::SystemTime::now() - .duration_since(std::time::SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(); let mut child = start_child_and_return(""); let (task, thread) = { @@ -51,6 +52,9 @@ fn dump_external_process() { (task, thread) }; + let pid = dbg!(std::process::id()); + assert!(task != unsafe { dbg!(mach2::traps::mach_task_self()) }); + let crash_context = crash_context::CrashContext { task, thread, @@ -62,10 +66,7 @@ fn dump_external_process() { }), }; - let mut tmpfile = tempfile::Builder::new() - .prefix("mac_external_process") - .tempfile() - .unwrap(); + let mut tmpfile = tempfile::Builder::new().prefix(name).tempfile().unwrap(); let mut dumper = MinidumpWriter::new(crash_context); @@ -75,7 +76,23 @@ fn dump_external_process() { child.kill().expect("failed to kill child"); - let md = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); + let minidump = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); + + Captured { + task, + thread, + minidump, + } +} + +#[test] +fn dump_external_process() { + let approximate_proc_start_time = std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + + let md = capture_minidump("dump_external_process").minidump; let crash_reason = get_crash_reason(&md); @@ -127,3 +144,54 @@ fn dump_external_process() { panic!("unexpected misc info type {:?}", misc_info); } } + +/// Validates we can actually walk the stack for each thread in the minidump, +/// this is using minidump-processor, which (currently) depends on breakpad +/// symbols, however https://github.com/mozilla/dump_syms is not available as +/// a library https://github.com/mozilla/dump_syms/issues/253, so we just require +/// that it already be installed, hence the ignore +#[test] +#[ignore = "ignored, requires dump_syms installed"] +fn stackwalks() { + println!("generating minidump..."); + let md = capture_minidump("stackwalks"); + + // Generate the breakpad symbols + println!("generating symbols..."); + let mut cmd = std::process::Command::new("dump_syms"); + cmd.args(["-o", "mac_stackwalks.sym", "target/debug/test"]); + assert!(cmd.status().unwrap().success()); + + let provider = + minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ + ".".into() + ])); + + let state = futures::executor::block_on(async { + minidump_processor::process_minidump(&md.minidump, &provider).await + }) + .unwrap(); + + //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); + + // We expect 2 threads, one of which is fake crashing thread + let fake_crash_thread = state + .threads + .iter() + .find(|cs| cs.thread_id == md.thread) + .expect("failed to find crash thread"); + + // The thread _should_ have a name + assert_eq!( + fake_crash_thread.thread_name.as_deref(), + Some("test-thread") + ); + + assert!( + fake_crash_thread + .frames + .iter() + .any(|sf| { sf.function_name.as_deref() == Some("wait_until_killed") }), + "unable to locate expected function" + ); +} From f44a51c5a946f43c18616f3d19f7bd0ef54850ff Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 08:23:16 +0200 Subject: [PATCH 35/53] Fix formatting --- src/bin/test.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/bin/test.rs b/src/bin/test.rs index eba6c60e..89e4005e 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -347,7 +347,6 @@ mod mac { #[inline(never)] pub(super) fn real_main(_args: Vec) -> Result<()> { - dbg!(unsafe { libc::_dyld_image_count() }); std::thread::Builder::new() .name("test-thread".to_owned()) @@ -360,11 +359,7 @@ mod mac { let thread = dbg!(mach2::mach_init::mach_thread_self()); let mut real_task = 0; - dbg!(mach2::traps::task_for_pid( - task, - pid as i32, - &mut real_task - )); + dbg!(mach2::traps::task_for_pid(task, pid as i32, &mut real_task)); dbg!(real_task); println!("{task} {thread}"); From 4ec6a9aaf2d490485be2fd66a298cdd91ee46ea4 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 08:23:28 +0200 Subject: [PATCH 36/53] Using as a path dep for now --- src/mac.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mac.rs b/src/mac.rs index 3e7fd59d..4d405999 100644 --- a/src/mac.rs +++ b/src/mac.rs @@ -1,3 +1,5 @@ +#![allow(unsafe_code)] + #[cfg(target_pointer_width = "32")] compile_error!("Various MacOS FFI bindings assume we are on a 64-bit architechture"); From 4af327454a930c7682af55b8bfcca5def91d5ee5 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 11:46:04 +0200 Subject: [PATCH 37/53] Use crash-context to properly send process info --- .gitignore | 1 + Cargo.toml | 5 +- src/bin/test.rs | 54 ++++++++++++++------- tests/common/mod.rs | 4 +- tests/linux_minidump_writer.rs | 7 +-- tests/mac_minidump_writer.rs | 87 ++++++++++++++++------------------ tests/ptrace_dumper.rs | 2 +- 7 files changed, 89 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 96ef6c0b..6beb0457 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +.test-symbols diff --git a/Cargo.toml b/Cargo.toml index 85063712..86fc3873 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,7 @@ mach2 = "0.4" # Sigh, minidump-processor is async for some reason so we need an executor :( futures = { version = "0.3", features = ["executor"] } minidump = "0.10" -minidump-processor = "0.10" +minidump-processor = { version = "0.10", features = ["breakpad-syms"] } memmap2 = "0.5" [target.'cfg(target_os = "macos")'.dev-dependencies] @@ -57,4 +57,5 @@ similar-asserts = "1.2" uuid = "1.0" [patch.crates-io] -minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } +minidump-common = { git = "https://github.com/rust-minidump/rust-minidump", branch = "main" } +crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "prep-release" } diff --git a/src/bin/test.rs b/src/bin/test.rs index 89e4005e..6ef236bf 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -344,34 +344,52 @@ mod windows { #[cfg(target_os = "macos")] mod mac { use super::*; + use std::time::Duration; #[inline(never)] - pub(super) fn real_main(_args: Vec) -> Result<()> { - dbg!(unsafe { libc::_dyld_image_count() }); + pub(super) fn real_main(args: Vec) -> Result<()> { + let port_name = args.get(0).ok_or("mach port name not specified")?; + let exception: u32 = args.get(1).ok_or("exception code not specified")?.parse()?; + + let client = + crash_context::ipc::Client::create(&std::ffi::CString::new(port_name.clone())?)?; + std::thread::Builder::new() .name("test-thread".to_owned()) .spawn(move || { #[inline(never)] - fn wait_until_killed() { - unsafe { - let task = dbg!(mach2::traps::mach_task_self()); - let pid = dbg!(std::process::id()); - let thread = dbg!(mach2::mach_init::mach_thread_self()); - - let mut real_task = 0; - dbg!(mach2::traps::task_for_pid(task, pid as i32, &mut real_task)); - dbg!(real_task); - - println!("{task} {thread}"); - - // Wait until we're killed - loop { - std::thread::park(); + fn wait_until_killed(client: crash_context::ipc::Client, exception: u32) { + // SAFETY: syscalls + let cc = unsafe { + crash_context::CrashContext { + task: mach2::traps::mach_task_self(), + thread: mach2::mach_init::mach_thread_self(), + handler_thread: mach2::port::MACH_PORT_NULL, + exception: Some(crash_context::ExceptionInfo { + kind: exception as i32, + code: 0, + subcode: None, + }), } + }; + + // Send the crash context to the server and wait for it to + // finish dumping, we should be killed shortly afterwards + client + .send_crash_context( + &cc, + Some(Duration::from_secs(2)), + Some(Duration::from_secs(5)), + ) + .expect("failed to send crash context/receive ack"); + + // Wait until we're killed + loop { + std::thread::park(); } } - wait_until_killed() + wait_until_killed(client, exception) }) .unwrap() .join() diff --git a/tests/common/mod.rs b/tests/common/mod.rs index bb262601..2c1ded5f 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -78,7 +78,7 @@ pub fn wait_for_threads(child: &mut Child, num: usize) { } #[allow(unused)] -pub fn start_child_and_return(command: &str) -> Child { +pub fn start_child_and_return(args: &[&str]) -> Child { let mut child = Command::new("cargo") .env("RUST_BACKTRACE", "1") .arg("run") @@ -86,7 +86,7 @@ pub fn start_child_and_return(command: &str) -> Child { .arg("--bin") .arg("test") .arg("--") - .arg(command) + .args(args) .stdout(Stdio::piped()) .spawn() .expect("failed to execute child"); diff --git a/tests/linux_minidump_writer.rs b/tests/linux_minidump_writer.rs index 3901f08e..6c8c0045 100644 --- a/tests/linux_minidump_writer.rs +++ b/tests/linux_minidump_writer.rs @@ -48,6 +48,7 @@ fn get_crash_context(tid: Pid) -> CrashContext { CrashContext { inner: crash_context::CrashContext { siginfo, + pid: std::process::id() as _, tid, context, float_state, @@ -98,7 +99,7 @@ fn test_write_dump_with_context() { } fn test_write_and_read_dump_from_parent_helper(context: Context) { - let mut child = start_child_and_return("spawn_mmap_wait"); + let mut child = start_child_and_return(&["spawn_mmap_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() @@ -214,7 +215,7 @@ fn test_write_and_read_dump_from_parent_with_context() { } fn test_write_with_additional_memory_helper(context: Context) { - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() @@ -623,7 +624,7 @@ fn test_sanitized_stacks_with_context() { } fn test_write_early_abort_helper(context: Context) { - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut tmpfile = tempfile::Builder::new() diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index 59591bd4..93145f82 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -28,52 +28,41 @@ fn get_crash_reason<'a, T: std::ops::Deref + 'a>( } struct Captured<'md> { + #[allow(dead_code)] task: u32, thread: u32, minidump: Minidump<'md, memmap2::Mmap>, } -fn capture_minidump(name: &str) -> Captured<'_> { - use std::io::BufRead; +fn capture_minidump(name: &str, exception_kind: u32) -> Captured<'_> { + // Create a mach port server to retrieve the crash details from the child + let mut server = crash_context::ipc::Server::create(&std::ffi::CString::new(name).unwrap()) + .expect("failed to create mach port service"); - let mut child = start_child_and_return(""); + let mut child = start_child_and_return(&[name, &exception_kind.to_string()]); - let (task, thread) = { - let mut f = std::io::BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); - let mut buf = String::new(); - f.read_line(&mut buf).expect("failed to read stdout"); - assert!(!buf.is_empty()); - - let mut biter = buf.trim().split(' '); - - let task: u32 = biter.next().unwrap().parse().unwrap(); - let thread: u32 = biter.next().unwrap().parse().unwrap(); - - (task, thread) - }; - - let pid = dbg!(std::process::id()); - assert!(task != unsafe { dbg!(mach2::traps::mach_task_self()) }); - - let crash_context = crash_context::CrashContext { - task, - thread, - handler_thread: mach2::port::MACH_PORT_NULL, - exception: Some(crash_context::ExceptionInfo { - kind: mach2::exception_types::EXC_BREAKPOINT as i32, - code: 100, - subcode: None, - }), - }; + // Wait for the child to spinup and report a crash context to us + let mut rcc = server + .try_recv_crash_context(Some(std::time::Duration::from_secs(5))) + .expect("failed to receive context") + .expect("receive timed out"); let mut tmpfile = tempfile::Builder::new().prefix(name).tempfile().unwrap(); - let mut dumper = MinidumpWriter::new(crash_context); + let task = rcc.crash_context.task; + let thread = rcc.crash_context.thread; + + let mut dumper = MinidumpWriter::new(rcc.crash_context); dumper .dump(tmpfile.as_file_mut()) .expect("failed to write minidump"); + // Signal the child that we've received and processed the crash context + rcc.acker + .send_ack(1, Some(std::time::Duration::from_secs(2))) + .expect("failed to send ack"); + child.kill().expect("failed to kill child"); let minidump = Minidump::read_path(tmpfile.path()).expect("failed to read minidump"); @@ -92,7 +81,11 @@ fn dump_external_process() { .unwrap() .as_secs(); - let md = capture_minidump("dump_external_process").minidump; + let md = capture_minidump( + "dump_external_process", + mach2::exception_types::EXC_BREAKPOINT, + ) + .minidump; let crash_reason = get_crash_reason(&md); @@ -154,17 +147,17 @@ fn dump_external_process() { #[ignore = "ignored, requires dump_syms installed"] fn stackwalks() { println!("generating minidump..."); - let md = capture_minidump("stackwalks"); + let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); // Generate the breakpad symbols println!("generating symbols..."); let mut cmd = std::process::Command::new("dump_syms"); - cmd.args(["-o", "mac_stackwalks.sym", "target/debug/test"]); + cmd.args(["-s", ".test-symbols", "target/debug/test"]); assert!(cmd.status().unwrap().success()); let provider = minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ - ".".into() + ".test-symbols".into(), ])); let state = futures::executor::block_on(async { @@ -174,24 +167,28 @@ fn stackwalks() { //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); - // We expect 2 threads, one of which is fake crashing thread + // We expect at least 2 threads, one of which is the fake crashing thread let fake_crash_thread = state .threads .iter() .find(|cs| cs.thread_id == md.thread) .expect("failed to find crash thread"); - // The thread _should_ have a name - assert_eq!( - fake_crash_thread.thread_name.as_deref(), - Some("test-thread") - ); + // The thread is named, however we currently don't retrieve that information + // currently, indeed, it appears that you need to retrieve the pthread that + // corresponds the mach port for a thread, however that API seems to be + // task specific... + // assert_eq!( + // fake_crash_thread.thread_name.as_deref(), + // Some("test-thread") + // ); assert!( - fake_crash_thread - .frames - .iter() - .any(|sf| { sf.function_name.as_deref() == Some("wait_until_killed") }), + fake_crash_thread.frames.iter().any(|sf| { + sf.function_name + .as_ref() + .map_or(false, |fname| fname.ends_with("wait_until_killed")) + }), "unable to locate expected function" ); } diff --git a/tests/ptrace_dumper.rs b/tests/ptrace_dumper.rs index 520a3eaa..35d89461 100644 --- a/tests/ptrace_dumper.rs +++ b/tests/ptrace_dumper.rs @@ -186,7 +186,7 @@ fn test_copy_from_process_self() { #[test] fn test_sanitize_stack_copy() { let num_of_threads = 1; - let mut child = start_child_and_return("spawn_alloc_wait"); + let mut child = start_child_and_return(&["spawn_alloc_wait"]); let pid = child.id() as i32; let mut f = BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); From 73c53c8fe6458e992614b9d98b11481b0dd0c16f Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 21:44:51 +0200 Subject: [PATCH 38/53] Remove minidump-processor temporarily due to openssl --- Cargo.toml | 2 +- tests/mac_minidump_writer.rs | 108 +++++++++++++++++------------------ 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 86fc3873..c840fab5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,7 @@ mach2 = "0.4" # Sigh, minidump-processor is async for some reason so we need an executor :( futures = { version = "0.3", features = ["executor"] } minidump = "0.10" -minidump-processor = { version = "0.10", features = ["breakpad-syms"] } +#minidump-processor = { version = "0.10", features = ["breakpad-syms"] } memmap2 = "0.5" [target.'cfg(target_os = "macos")'.dev-dependencies] diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index 93145f82..b1f1fc07 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -138,57 +138,57 @@ fn dump_external_process() { } } -/// Validates we can actually walk the stack for each thread in the minidump, -/// this is using minidump-processor, which (currently) depends on breakpad -/// symbols, however https://github.com/mozilla/dump_syms is not available as -/// a library https://github.com/mozilla/dump_syms/issues/253, so we just require -/// that it already be installed, hence the ignore -#[test] -#[ignore = "ignored, requires dump_syms installed"] -fn stackwalks() { - println!("generating minidump..."); - let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); - - // Generate the breakpad symbols - println!("generating symbols..."); - let mut cmd = std::process::Command::new("dump_syms"); - cmd.args(["-s", ".test-symbols", "target/debug/test"]); - assert!(cmd.status().unwrap().success()); - - let provider = - minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ - ".test-symbols".into(), - ])); - - let state = futures::executor::block_on(async { - minidump_processor::process_minidump(&md.minidump, &provider).await - }) - .unwrap(); - - //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); - - // We expect at least 2 threads, one of which is the fake crashing thread - let fake_crash_thread = state - .threads - .iter() - .find(|cs| cs.thread_id == md.thread) - .expect("failed to find crash thread"); - - // The thread is named, however we currently don't retrieve that information - // currently, indeed, it appears that you need to retrieve the pthread that - // corresponds the mach port for a thread, however that API seems to be - // task specific... - // assert_eq!( - // fake_crash_thread.thread_name.as_deref(), - // Some("test-thread") - // ); - - assert!( - fake_crash_thread.frames.iter().any(|sf| { - sf.function_name - .as_ref() - .map_or(false, |fname| fname.ends_with("wait_until_killed")) - }), - "unable to locate expected function" - ); -} +// /// Validates we can actually walk the stack for each thread in the minidump, +// /// this is using minidump-processor, which (currently) depends on breakpad +// /// symbols, however https://github.com/mozilla/dump_syms is not available as +// /// a library https://github.com/mozilla/dump_syms/issues/253, so we just require +// /// that it already be installed, hence the ignore +// #[test] +// #[ignore = "ignored, requires dump_syms installed"] +// fn stackwalks() { +// println!("generating minidump..."); +// let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); + +// // Generate the breakpad symbols +// println!("generating symbols..."); +// let mut cmd = std::process::Command::new("dump_syms"); +// cmd.args(["-s", ".test-symbols", "target/debug/test"]); +// assert!(cmd.status().unwrap().success()); + +// let provider = +// minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ +// ".test-symbols".into(), +// ])); + +// let state = futures::executor::block_on(async { +// minidump_processor::process_minidump(&md.minidump, &provider).await +// }) +// .unwrap(); + +// //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); + +// // We expect at least 2 threads, one of which is the fake crashing thread +// let fake_crash_thread = state +// .threads +// .iter() +// .find(|cs| cs.thread_id == md.thread) +// .expect("failed to find crash thread"); + +// // The thread is named, however we currently don't retrieve that information +// // currently, indeed, it appears that you need to retrieve the pthread that +// // corresponds the mach port for a thread, however that API seems to be +// // task specific... +// // assert_eq!( +// // fake_crash_thread.thread_name.as_deref(), +// // Some("test-thread") +// // ); + +// assert!( +// fake_crash_thread.frames.iter().any(|sf| { +// sf.function_name +// .as_ref() +// .map_or(false, |fname| fname.ends_with("wait_until_killed")) +// }), +// "unable to locate expected function" +// ); +// } From ccada3b62b36d5b1c8498768543e419aa9c75c84 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 21:55:34 +0200 Subject: [PATCH 39/53] Fix windows --- tests/windows_minidump_writer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index 7415e370..b34a44c1 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -51,6 +51,7 @@ fn dump_current_process() { let crash_context = crash_context::CrashContext { exception_pointers: (&exception_ptrs as *const EXCEPTION_POINTERS).cast(), + process_id: std::process::id(), thread_id: GetCurrentThreadId(), exception_code: STATUS_INVALID_PARAMETER, }; From fe4e0d9c36783f4668919efe81803e9b1c64df75 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 21:56:10 +0200 Subject: [PATCH 40/53] Fix it better --- tests/windows_minidump_writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index b34a44c1..ed6fb07e 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -86,7 +86,7 @@ fn dump_current_process() { fn dump_external_process() { use std::io::BufRead; - let mut child = start_child_and_return(&format!("{:x}", EXCEPTION_ILLEGAL_INSTRUCTION)); + let mut child = start_child_and_return(&[&format!("{:x}", EXCEPTION_ILLEGAL_INSTRUCTION)]); let (process_id, exception_pointers, thread_id, exception_code) = { let mut f = std::io::BufReader::new(child.stdout.as_mut().expect("Can't open stdout")); From b67946e4b824d5c114e05f02e6f7b554858ab97e Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 28 Apr 2022 21:57:05 +0200 Subject: [PATCH 41/53] oops --- tests/windows_minidump_writer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index ed6fb07e..5a431741 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -108,6 +108,7 @@ fn dump_external_process() { let crash_context = crash_context::CrashContext { exception_pointers: exception_pointers as _, + process_id, thread_id, exception_code, }; From a8e0783a9523d1184b24981432470352691ef8f3 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 07:33:59 +0200 Subject: [PATCH 42/53] Get rid of needless functions now that pid is part of context --- src/windows/minidump_writer.rs | 77 ++++++++++++++------------------ tests/windows_minidump_writer.rs | 5 +-- 2 files changed, 36 insertions(+), 46 deletions(-) diff --git a/src/windows/minidump_writer.rs b/src/windows/minidump_writer.rs index ce6bc2f8..f6d4d9d9 100644 --- a/src/windows/minidump_writer.rs +++ b/src/windows/minidump_writer.rs @@ -13,8 +13,6 @@ pub struct MinidumpWriter { crash_context: crash_context::CrashContext, /// Handle to the crashing process, which could be ourselves crashing_process: HANDLE, - /// The pid of the crashing process. - crashing_pid: u32, /// The `EXCEPTION_POINTERS` contained in crash context is a pointer into the /// memory of the process that crashed, as it contains an `EXCEPTION_RECORD` /// record which is an internally linked list, so in the case that we are @@ -26,53 +24,41 @@ pub struct MinidumpWriter { } impl MinidumpWriter { - /// Creates a minidump writer for a crash that occurred in an external process. + /// Creates a minidump writer capable of dumping the process specified by + /// the [`crash_context::CrashContext`]. /// - /// # Errors + /// Note that it is inherently unreliable to dump the currently running + /// processes, it is recommended to dump from an external process if possible. /// - /// Fails if we are unable to open the external process for some reason - pub fn external_process( - crash_context: crash_context::CrashContext, - pid: u32, - ) -> Result { - // SAFETY: syscall - let crashing_process = unsafe { - threading::OpenProcess( - threading::PROCESS_ALL_ACCESS, // desired access - 0, // inherit handles - pid, // pid - ) - }; - - if crashing_process == 0 { - Err(std::io::Error::last_os_error().into()) - } else { - Ok(Self { - crash_context, - crashing_process, - crashing_pid: pid, - is_external_process: true, - }) - } - } - - /// Creates a minidump writer for a crash that occurred in the current process. + /// # Errors /// - /// Note that in-process dumping is inherently unreliable, it is recommended - /// to use the [`Self::external_process`] in a different process than the - /// one that crashed when possible. - pub fn current_process(crash_context: crash_context::CrashContext) -> Self { - let crashing_pid = std::process::id(); + /// Fails if the process specified in the context is not the local process + /// and we are unable to open it due to eg. security reasons. + pub fn new(crash_context: crash_context::CrashContext) -> Result { + // SAFETY: syscalls + let (crashing_process, is_external_process) = unsafe { + if crash_context.process_id != std::process::id() { + let proc = threading::OpenProcess( + threading::PROCESS_ALL_ACCESS, // desired access + 0, // inherit handles + pid, // pid + ); + + if proc == 0 { + return Err(std::io::Error::last_os_error().into()); + } - // SAFETY: syscall - let crashing_process = unsafe { threading::GetCurrentProcess() }; + (proc, true) + } else { + (threading::GetCurrentProcess(), false) + } + }; - Self { + Ok(Self { crash_context, crashing_process, - crashing_pid, - is_external_process: false, - } + is_external_process: true, + }) } /// Writes a minidump to the specified file @@ -132,7 +118,7 @@ impl MinidumpWriter { let ret = unsafe { md::MiniDumpWriteDump( self.crashing_process, // HANDLE to the process with the crash we want to capture - self.crashing_pid, // process id + self.crash_context.process_id, // process id destination.as_raw_handle() as HANDLE, // file to write the minidump to md::MiniDumpNormal, // MINIDUMP_TYPE - we _might_ want to make this configurable exc_info @@ -278,6 +264,11 @@ impl MinidumpWriter { impl Drop for MinidumpWriter { fn drop(&mut self) { + // Note we close the handle regardless of whether it is the local handle + // or an external one, as noted in the docs + // + // > The pseudo handle need not be closed when it is no longer needed. + // > Calling the CloseHandle function with a pseudo handle has no effect. // SAFETY: syscall unsafe { CloseHandle(self.crashing_process) }; } diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index 5a431741..f385b443 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -56,7 +56,7 @@ fn dump_current_process() { exception_code: STATUS_INVALID_PARAMETER, }; - let dumper = MinidumpWriter::current_process(crash_context); + let dumper = MinidumpWriter::new(crash_context); dumper .dump(tmpfile.as_file_mut()) @@ -118,8 +118,7 @@ fn dump_external_process() { .tempfile() .unwrap(); - let dumper = MinidumpWriter::external_process(crash_context, process_id) - .expect("failed to create MinidumpWriter"); + let dumper = MinidumpWriter::new(crash_context).expect("failed to create MinidumpWriter"); dumper .dump(tmpfile.as_file_mut()) From b18c57214ea5fde365a784ac624b55a9264c5b26 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 07:40:40 +0200 Subject: [PATCH 43/53] Shame --- src/windows/minidump_writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/windows/minidump_writer.rs b/src/windows/minidump_writer.rs index f6d4d9d9..6a1bd2f1 100644 --- a/src/windows/minidump_writer.rs +++ b/src/windows/minidump_writer.rs @@ -41,7 +41,7 @@ impl MinidumpWriter { let proc = threading::OpenProcess( threading::PROCESS_ALL_ACCESS, // desired access 0, // inherit handles - pid, // pid + crash_context.process_id, // pid ); if proc == 0 { From a56338864485ce2037d0132d542d22c6d8d15ab1 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 07:47:17 +0200 Subject: [PATCH 44/53] Fix windows test compilation --- tests/windows_minidump_writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/windows_minidump_writer.rs b/tests/windows_minidump_writer.rs index f385b443..038f8599 100644 --- a/tests/windows_minidump_writer.rs +++ b/tests/windows_minidump_writer.rs @@ -56,7 +56,7 @@ fn dump_current_process() { exception_code: STATUS_INVALID_PARAMETER, }; - let dumper = MinidumpWriter::new(crash_context); + let dumper = MinidumpWriter::new(crash_context).expect("failed to create MinidumpWriter"); dumper .dump(tmpfile.as_file_mut()) From 8fa66d80bd2919148cf2b856d8fe55ba64a6ced0 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 10:52:31 +0200 Subject: [PATCH 45/53] Gracefully handle invalid addresses for thread stacks I encountered an issue, only on x86_64, but could affect aarch64 as well, where a stack overflow could fail to be dumped due the stack address being borked causing the vm_read to fail with `InvalidAddress`. This changes it so that in that case a sentinel value is used in place of the actual stack, as there was already a graceful handling of the case where the stack was reported to be of 0 size that was ported from Breakpad --- src/mac/streams/thread_list.rs | 52 ++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/mac/streams/thread_list.rs b/src/mac/streams/thread_list.rs index 8ae4785e..fdc40cc3 100644 --- a/src/mac/streams/thread_list.rs +++ b/src/mac/streams/thread_list.rs @@ -81,30 +81,46 @@ impl MinidumpWriter { let stack_size = self.calculate_stack_size(start, dumper); - let stack_location = if stack_size == 0 { - // In some situations the stack address for the thread can come back 0. - // In these cases we skip over the threads in question and stuff the - // stack with a clearly borked value. - thread.stack.start_of_memory_range = 0xdeadbeef; + // In some situations the stack address for the thread can come back 0. + // In these cases we skip over the threads in question and stuff the + // stack with a clearly borked value. + // + // In other cases, notably a stack overflow, we might fail to read the + // stack eg. InvalidAddress in which case we use a different borked + // value to indicate the different failure + let stack_location = if stack_size != 0 { + dumper + .read_task_memory(start, stack_size) + .map(|stack_buffer| { + let stack_location = MDLocationDescriptor { + data_size: stack_buffer.len() as u32, + rva: buffer.position() as u32, + }; + buffer.write_all(&stack_buffer); + stack_location + }) + .ok() + } else { + None + }; - let stack_location = MDLocationDescriptor { - data_size: 16, - rva: buffer.position() as u32, + thread.stack.memory = stack_location.unwrap_or_else(|| { + let borked = if stack_size == 0 { + 0xdeadbeef + } else { + 0xdeaddead }; - buffer.write_all(&0xdeadbeefu64.to_ne_bytes()); - buffer.write_all(&0xdeadbeefu64.to_ne_bytes()); - stack_location - } else { - let stack_buffer = dumper.read_task_memory(start, stack_size)?; + + thread.stack.start_of_memory_range = borked; + let stack_location = MDLocationDescriptor { - data_size: stack_buffer.len() as u32, + data_size: 16, rva: buffer.position() as u32, }; - buffer.write_all(&stack_buffer); + buffer.write_all(&borked.to_ne_bytes()); + buffer.write_all(&borked.to_ne_bytes()); stack_location - }; - - thread.stack.memory = stack_location; + }); // Add the stack memory as a raw block of memory, this is written to // the minidump as part of the memory list stream From 9189c8dbb857334ede43214f61cbde14d63db934 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 15:31:17 +0200 Subject: [PATCH 46/53] Remove crash-context patch --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c840fab5..67c36d25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT" [dependencies] byteorder = "1.3.2" cfg-if = "1.0" -crash-context = "0.1" +crash-context = "0.2" memoffset = "0.6" minidump-common = "0.10" scroll = "0.11" @@ -58,4 +58,3 @@ uuid = "1.0" [patch.crates-io] minidump-common = { git = "https://github.com/rust-minidump/rust-minidump", branch = "main" } -crash-context = { git = "https://github.com/EmbarkStudios/crash-handling", branch = "prep-release" } From 46e3999df0df1fbc994121d8f14ace4f890d8b6a Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 29 Apr 2022 17:31:28 +0200 Subject: [PATCH 47/53] Add CHANGELOG.md --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..084c32e0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ + + +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] +### Added +- Initial implementation of support for `x86_64-apple-darwin` and `aarch64-apple-darwin` + +## [0.1.0] - 2022-04-26 +### Added +- Initial release, including basic support for `x86_64-unknown-linux-gnu/musl` and `x86_64-pc-windows-msvc` + +[Unreleased]: https://github.com/rust-minidump/minidump-writer/compare/0.1.0...HEAD +[0.1.0]: https://github.com/rust-minidump/minidump-writer/releases/tag/0.1.0 From 2082ff70935f8d6faa36a8476aad8a0c20a507d0 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 2 May 2022 13:44:03 +0200 Subject: [PATCH 48/53] Reenable test with patches --- Cargo.toml | 12 +++- tests/mac_minidump_writer.rs | 111 ++++++++++++++++++----------------- 2 files changed, 68 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 67c36d25..47b610a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,12 +49,22 @@ mach2 = "0.4" # Sigh, minidump-processor is async for some reason so we need an executor :( futures = { version = "0.3", features = ["executor"] } minidump = "0.10" -#minidump-processor = { version = "0.10", features = ["breakpad-syms"] } memmap2 = "0.5" [target.'cfg(target_os = "macos")'.dev-dependencies] +# We dump symbols for the `test` executable so that we can validate that minidumps +# created by this crate can be processed by minidump-processor +dump_syms = { version = "0.0.7", default-features = false } +minidump-processor = { version = "0.10", default-features = false, features = [ + "breakpad-syms", +] } similar-asserts = "1.2" uuid = "1.0" [patch.crates-io] +# PR https://github.com/rust-minidump/rust-minidump/pull/534, not released yet minidump-common = { git = "https://github.com/rust-minidump/rust-minidump", branch = "main" } +# PR https://github.com/rust-minidump/rust-minidump/pull/548 +minidump-processor = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } +# PR https://github.com/mozilla/dump_syms/pull/356 +dump_syms = { git = "https://github.com/EmbarkStudios/dump_syms", branch = "master" } diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index b1f1fc07..7b8c5a2d 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -138,57 +138,60 @@ fn dump_external_process() { } } -// /// Validates we can actually walk the stack for each thread in the minidump, -// /// this is using minidump-processor, which (currently) depends on breakpad -// /// symbols, however https://github.com/mozilla/dump_syms is not available as -// /// a library https://github.com/mozilla/dump_syms/issues/253, so we just require -// /// that it already be installed, hence the ignore -// #[test] -// #[ignore = "ignored, requires dump_syms installed"] -// fn stackwalks() { -// println!("generating minidump..."); -// let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); - -// // Generate the breakpad symbols -// println!("generating symbols..."); -// let mut cmd = std::process::Command::new("dump_syms"); -// cmd.args(["-s", ".test-symbols", "target/debug/test"]); -// assert!(cmd.status().unwrap().success()); - -// let provider = -// minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ -// ".test-symbols".into(), -// ])); - -// let state = futures::executor::block_on(async { -// minidump_processor::process_minidump(&md.minidump, &provider).await -// }) -// .unwrap(); - -// //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); - -// // We expect at least 2 threads, one of which is the fake crashing thread -// let fake_crash_thread = state -// .threads -// .iter() -// .find(|cs| cs.thread_id == md.thread) -// .expect("failed to find crash thread"); - -// // The thread is named, however we currently don't retrieve that information -// // currently, indeed, it appears that you need to retrieve the pthread that -// // corresponds the mach port for a thread, however that API seems to be -// // task specific... -// // assert_eq!( -// // fake_crash_thread.thread_name.as_deref(), -// // Some("test-thread") -// // ); - -// assert!( -// fake_crash_thread.frames.iter().any(|sf| { -// sf.function_name -// .as_ref() -// .map_or(false, |fname| fname.ends_with("wait_until_killed")) -// }), -// "unable to locate expected function" -// ); -// } +/// Validates we can actually walk the stack for each thread in the minidump, +/// this is using minidump-processor, which (currently) depends on breakpad +/// symbols, however https://github.com/mozilla/dump_syms is not available as +/// a library https://github.com/mozilla/dump_syms/issues/253, so we just require +/// that it already be installed, hence the ignore +#[test] +fn stackwalks() { + println!("generating minidump..."); + let md = capture_minidump("stackwalks", mach2::exception_types::EXC_BREAKPOINT); + + // Generate the breakpad symbols + println!("generating symbols..."); + dump_syms::dumper::single_file( + dump_syms::dumper::Config { + output: dump_syms::dumper::Output::Store(".test-symbols".into()), + }, + "target/debug/test", + ) + .expect("failed to dump symbols"); + + let provider = + minidump_processor::Symbolizer::new(minidump_processor::simple_symbol_supplier(vec![ + ".test-symbols".into(), + ])); + + let state = futures::executor::block_on(async { + minidump_processor::process_minidump(&md.minidump, &provider).await + }) + .unwrap(); + + //state.print(&mut std::io::stdout()).map_err(|_| ()).unwrap(); + + // We expect at least 2 threads, one of which is the fake crashing thread + let fake_crash_thread = state + .threads + .iter() + .find(|cs| cs.thread_id == md.thread) + .expect("failed to find crash thread"); + + // The thread is named, however we currently don't retrieve that information + // currently, indeed, it appears that you need to retrieve the pthread that + // corresponds the mach port for a thread, however that API seems to be + // task specific... + // assert_eq!( + // fake_crash_thread.thread_name.as_deref(), + // Some("test-thread") + // ); + + assert!( + fake_crash_thread.frames.iter().any(|sf| { + sf.function_name + .as_ref() + .map_or(false, |fname| fname.ends_with("wait_until_killed")) + }), + "unable to locate expected function" + ); +} From 41c1768ff6e61e321398ce8a0a451c719997315c Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 2 May 2022 13:53:50 +0200 Subject: [PATCH 49/53] Fix build --- tests/mac_minidump_writer.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index 7b8c5a2d..4848a9d1 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -153,6 +153,17 @@ fn stackwalks() { dump_syms::dumper::single_file( dump_syms::dumper::Config { output: dump_syms::dumper::Output::Store(".test-symbols".into()), + symbol_server: None, + debug_id: None, + code_id: None, + arch: "", + file_type: dump_syms::common::FileType::Macho, + num_jobs: 2, // default this + check_cfi: false, + mapping_var: None, + mapping_src: None, + mapping_dest: None, + mapping_file: None, }, "target/debug/test", ) From 0b62a959849a018fb47c6b0314f1f80ab82ebb24 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 2 May 2022 13:55:58 +0200 Subject: [PATCH 50/53] Fix it for real this time --- Cargo.toml | 3 ++- tests/mac_minidump_writer.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 47b610a1..55beaaf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,8 +63,9 @@ uuid = "1.0" [patch.crates-io] # PR https://github.com/rust-minidump/rust-minidump/pull/534, not released yet -minidump-common = { git = "https://github.com/rust-minidump/rust-minidump", branch = "main" } +minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } # PR https://github.com/rust-minidump/rust-minidump/pull/548 minidump-processor = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } +minidump = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } # need to patch minidump as well so types are the same # PR https://github.com/mozilla/dump_syms/pull/356 dump_syms = { git = "https://github.com/EmbarkStudios/dump_syms", branch = "master" } diff --git a/tests/mac_minidump_writer.rs b/tests/mac_minidump_writer.rs index 4848a9d1..3b6f6c8a 100644 --- a/tests/mac_minidump_writer.rs +++ b/tests/mac_minidump_writer.rs @@ -151,7 +151,7 @@ fn stackwalks() { // Generate the breakpad symbols println!("generating symbols..."); dump_syms::dumper::single_file( - dump_syms::dumper::Config { + &dump_syms::dumper::Config { output: dump_syms::dumper::Output::Store(".test-symbols".into()), symbol_server: None, debug_id: None, From 699f248dc2be4ddad10d1c8a5a79634fb671f7fc Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 2 May 2022 14:17:04 +0200 Subject: [PATCH 51/53] Replace cargo-audit with cargo-deny There are a couple of advisories that are "OK" for now (though I will PR them to fix them), but the cargo audit GHA doesn't have a way to ignore advisories, cargo-deny does --- .github/workflows/audit.yml | 13 +++++++------ deny.toml | 9 +++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 deny.toml diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index a79266c9..aaadff55 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -3,12 +3,12 @@ name: Security audit on: schedule: # Runs at 00:00 UTC everyday - - cron: '0 0 * * *' + - cron: "0 0 * * *" push: paths: - - '**/Cargo.toml' - - '**/Cargo.lock' - - '**/audit.toml' + - "**/Cargo.toml" + - "**/Cargo.lock" + - "**/audit.toml" jobs: audit: @@ -16,6 +16,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v2 - - uses: actions-rs/audit-check@v1 + - name: deny audit + uses: EmbarkStudios/cargo-deny-action@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} + command: check advisories diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000..b8e12925 --- /dev/null +++ b/deny.toml @@ -0,0 +1,9 @@ +[advisories] +ignore = [ + # chrono can segfault due to use of localtime_r, however this is only used + # via the `cab` crate, which is not using local time + "RUSTSEC-2020-0159", + # This is an old version of time that can segfault due to local time, but + # again, this functionality is not being used + "RUSTSEC-2020-0071", +] From 101809301be8587be257ab2829d16ee42da57244 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Thu, 19 May 2022 21:43:18 +0200 Subject: [PATCH 52/53] Remove minidump* patches now that 0.11 was released --- Cargo.toml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 55beaaf6..feaea30b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ byteorder = "1.3.2" cfg-if = "1.0" crash-context = "0.2" memoffset = "0.6" -minidump-common = "0.10" +minidump-common = "0.11" scroll = "0.11" tempfile = "3.1.0" thiserror = "1.0.21" @@ -48,24 +48,19 @@ mach2 = "0.4" [dev-dependencies] # Sigh, minidump-processor is async for some reason so we need an executor :( futures = { version = "0.3", features = ["executor"] } -minidump = "0.10" +minidump = "0.11" memmap2 = "0.5" [target.'cfg(target_os = "macos")'.dev-dependencies] # We dump symbols for the `test` executable so that we can validate that minidumps # created by this crate can be processed by minidump-processor dump_syms = { version = "0.0.7", default-features = false } -minidump-processor = { version = "0.10", default-features = false, features = [ +minidump-processor = { version = "0.11", default-features = false, features = [ "breakpad-syms", ] } similar-asserts = "1.2" uuid = "1.0" [patch.crates-io] -# PR https://github.com/rust-minidump/rust-minidump/pull/534, not released yet -minidump-common = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } -# PR https://github.com/rust-minidump/rust-minidump/pull/548 -minidump-processor = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } -minidump = { git = "https://github.com/EmbarkStudios/rust-minidump", branch = "main" } # need to patch minidump as well so types are the same -# PR https://github.com/mozilla/dump_syms/pull/356 -dump_syms = { git = "https://github.com/EmbarkStudios/dump_syms", branch = "master" } +# PR https://github.com/mozilla/dump_syms/pull/356, merged, but unreleased +dump_syms = { git = "https://github.com/mozilla/dump_syms", rev = "c2743d5" } # branch = master From 4e4456618329233036a6282723e2e34e4dd8ec76 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Fri, 20 May 2022 12:52:56 +0200 Subject: [PATCH 53/53] Ensure aarch64 compiles as well --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 805c2003..95f7fb80 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,7 @@ jobs: - { os: ubuntu-latest, target: arm-unknown-linux-gnueabihf, use-cross: true } - { os: windows-2022, target: x86_64-pc-windows-msvc, use-cross: false } - { os: macos-latest, target: x86_64-apple-darwin, use-cross: false } + - { os: macos-latest, target: aarch64-apple-darwin, use-cross: false } steps: - name: Checkout repository uses: actions/checkout@v2