diff --git a/Cargo.lock b/Cargo.lock index 74ca8164..59763674 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1065,6 +1065,7 @@ dependencies = [ "log", "lru", "memmap", + "num-traits", "proc-maps", "py-spy-testdata", "rand", diff --git a/Cargo.toml b/Cargo.toml index 59482f68..59cbcc4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ lazy_static = "1.4.0" libc = "0.2" log = "0.4" lru = "0.10" +num-traits = "0.2" regex = ">=1.6.0" tempfile = "3.6.0" proc-maps = "0.3.2" diff --git a/src/binary_parser.rs b/src/binary_parser.rs index ace9ab45..ca40a9a4 100644 --- a/src/binary_parser.rs +++ b/src/binary_parser.rs @@ -6,6 +6,8 @@ use anyhow::Error; use goblin::Object; use memmap::Mmap; +use crate::utils::is_subrange; + pub struct BinaryInfo { pub symbols: HashMap<String, u64>, pub bss_addr: u64, @@ -52,6 +54,12 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, filename.display() ) })??; + if !is_subrange(0, buffer.len(), arch.offset as usize, arch.size as usize) { + return Err(format_err!( + "Invalid offset/size in FAT archive in {}", + filename.display() + )); + } let bytes = &buffer[arch.offset as usize..][..arch.size as usize]; goblin::mach::MachO::parse(bytes, 0)? } @@ -62,8 +70,12 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, for segment in mach.segments.iter() { for (section, _) in &segment.sections()? { if section.name()? == "__bss" { - bss_addr = section.addr + offset; - bss_size = section.size; + if let Some(addr) = section.addr.checked_add(offset) { + if addr.checked_add(section.size).is_some() { + bss_addr = addr; + bss_size = section.size; + } + } } } } @@ -126,18 +138,62 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, // the map address is relatively small. In this case we can default to 0. let offset = offset.saturating_sub(program_header.p_vaddr); + let mut bss_addr = 0; + let mut bss_size = 0; + let mut bss_end = 0; + if let Some(addr) = bss_header.sh_addr.checked_add(offset) { + if bss_header.sh_size.checked_add(addr).is_none() { + return Err(format_err!( + "Invalid bss address/size in {}", + filename.display() + )); + } + bss_addr = addr; + bss_size = bss_header.sh_size; + bss_end = bss_header.sh_addr + bss_header.sh_size; + } + for sym in elf.syms.iter() { - let name = elf.strtab[sym.st_name].to_string(); - symbols.insert(name, sym.st_value + offset); + // Skip imported symbols + if sym.is_import() + || (bss_end != 0 + && sym.st_size != 0 + && !is_subrange(0u64, bss_end, sym.st_value, sym.st_size)) + { + continue; + } + if let Some(pos) = sym.st_value.checked_add(offset) { + if sym.is_function() && !is_subrange(addr, size, pos, sym.st_size) { + continue; + } + if let Some(name) = elf.strtab.get_unsafe(sym.st_name) { + symbols.insert(name.to_string(), pos); + } + } } for dynsym in elf.dynsyms.iter() { - let name = elf.dynstrtab[dynsym.st_name].to_string(); - symbols.insert(name, dynsym.st_value + offset); + // Skip imported symbols + if dynsym.is_import() + || (bss_end != 0 + && dynsym.st_size != 0 + && !is_subrange(0u64, bss_end, dynsym.st_value, dynsym.st_size)) + { + continue; + } + if let Some(pos) = dynsym.st_value.checked_add(offset) { + if dynsym.is_function() && !is_subrange(addr, size, pos, dynsym.st_size) { + continue; + } + if let Some(name) = elf.dynstrtab.get_unsafe(dynsym.st_name) { + symbols.insert(name.to_string(), pos); + } + } } + Ok(BinaryInfo { symbols, - bss_addr: bss_header.sh_addr + offset, - bss_size: bss_header.sh_size, + bss_addr, + bss_size, addr, size, }) @@ -146,7 +202,9 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, for export in pe.exports { if let Some(name) = export.name { if let Some(export_offset) = export.offset { - symbols.insert(name.to_string(), export_offset as u64 + offset); + if let Some(addr) = offset.checked_add(export_offset as u64) { + symbols.insert(name.to_string(), addr); + } } } } @@ -161,8 +219,14 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo, ) }) .map(|data_section| { - let bss_addr = u64::from(data_section.virtual_address) + offset; - let bss_size = u64::from(data_section.virtual_size); + let mut bss_addr = 0; + let mut bss_size = 0; + if let Some(addr) = offset.checked_add(data_section.virtual_address as u64) { + if addr.checked_add(data_section.virtual_size as u64).is_some() { + bss_addr = addr; + bss_size = u64::from(data_section.virtual_size); + } + } BinaryInfo { symbols, diff --git a/src/utils.rs b/src/utils.rs index 2ca1374f..4df08724 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,6 @@ +use num_traits::{CheckedAdd, Zero}; +use std::ops::Add; + #[cfg(feature = "unwind")] pub fn resolve_filename(filename: &str, modulename: &str) -> Option<String> { // check the filename first, if it exists use it @@ -20,3 +23,56 @@ pub fn resolve_filename(filename: &str, modulename: &str) -> Option<String> { None } + +pub fn is_subrange<T: Eq + Ord + Add + CheckedAdd + Zero>( + start: T, + size: T, + sub_start: T, + sub_size: T, +) -> bool { + !size.is_zero() + && !sub_size.is_zero() + && start.checked_add(&size).is_some() + && sub_start.checked_add(&sub_size).is_some() + && sub_start >= start + && sub_start + sub_size <= start + size +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_subrange() { + assert!(is_subrange( + 0u64, + 0xffff_ffff_ffff_ffff, + 0, + 0xffff_ffff_ffff_ffff + )); + assert!(is_subrange(0, 1, 0, 1)); + assert!(is_subrange(0, 100, 0, 10)); + assert!(is_subrange(0, 100, 90, 10)); + + assert!(!is_subrange(0, 0, 0, 0)); + assert!(!is_subrange(1, 0, 0, 0)); + assert!(!is_subrange(1, 0, 1, 0)); + assert!(!is_subrange(0, 0, 0, 1)); + assert!(!is_subrange(0, 0, 1, 0)); + assert!(!is_subrange( + 1u64, + 0xffff_ffff_ffff_ffff, + 0, + 0xffff_ffff_ffff_ffff + )); + assert!(!is_subrange( + 0u64, + 0xffff_ffff_ffff_ffff, + 1, + 0xffff_ffff_ffff_ffff + )); + assert!(!is_subrange(0, 10, 0, 11)); + assert!(!is_subrange(0, 10, 1, 10)); + assert!(!is_subrange(0, 10, 9, 2)); + } +}