From 549248b9831d75606247f4968421b158a5cde2e4 Mon Sep 17 00:00:00 2001 From: Enkelmann Date: Mon, 4 Sep 2023 12:48:21 +0200 Subject: [PATCH] refactor CWE-416 check --- .../src/checkers/cwe_416/context.rs | 4 +- .../src/checkers/cwe_416/mod.rs | 319 ++++++------------ .../src/checkers/cwe_416/state.rs | 267 ++++++++------- 3 files changed, 245 insertions(+), 345 deletions(-) diff --git a/src/cwe_checker_lib/src/checkers/cwe_416/context.rs b/src/cwe_checker_lib/src/checkers/cwe_416/context.rs index 88f668c34..1174f5d2e 100644 --- a/src/cwe_checker_lib/src/checkers/cwe_416/context.rs +++ b/src/cwe_checker_lib/src/checkers/cwe_416/context.rs @@ -76,7 +76,7 @@ impl<'a> Context<'a> { state: &mut State, call_tid: &Tid, call_params: impl IntoIterator, - ) -> Option> { + ) -> Option)>> { let mut warnings = Vec::new(); for arg in call_params { if let Some(arg_value) = self @@ -174,7 +174,7 @@ impl<'a> Context<'a> { name: &str, description: String, location: &Tid, - warning_causes: Vec<(AbstractIdentifier, Tid)>, + warning_causes: Vec<(AbstractIdentifier, Vec)>, root_function: &Tid, ) { let cwe_warning = CweWarning { diff --git a/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs b/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs index 004f191a8..f4fb13b07 100644 --- a/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs +++ b/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs @@ -48,21 +48,13 @@ //! may lead to missed CWEs in this check. //! - Pointers freed by other operations than calls to the deallocation symbols contained in the config.json will be missed by the analysis. -use std::collections::BTreeSet; -use std::collections::HashMap; -use std::collections::HashSet; - -use crate::abstract_domain::AbstractDomain; use crate::abstract_domain::AbstractIdentifier; -use crate::analysis::fixpoint::Computation; -use crate::analysis::forward_interprocedural_fixpoint::GeneralizedContext; -use crate::analysis::graph::Node; -use crate::analysis::interprocedural_fixpoint_generic::NodeValue; -use crate::analysis::pointer_inference::PointerInference; use crate::prelude::*; use crate::utils::log::CweWarning; use crate::utils::log::LogMessage; use crate::CweModule; +use std::collections::BTreeSet; +use std::collections::HashSet; /// The module name and version pub static CWE_MODULE: CweModule = CweModule { @@ -123,16 +115,13 @@ pub fn check_cwe( fixpoint_computation.compute_with_max_steps(100); - let mut warnings = HashSet::new(); + let mut warnings = BTreeSet::new(); while let Ok(warning) = cwe_warning_receiver.try_recv() { warnings.insert(warning); } - let return_site_states = collect_return_site_states(&fixpoint_computation); let cwes = generate_context_information_for_warnings( - return_site_states, warnings, config.always_include_full_path_to_free_site, - analysis_results.pointer_inference.unwrap(), ); let mut logs = BTreeSet::new(); @@ -151,18 +140,15 @@ pub struct WarningContext { cwe: CweWarning, /// The TID of the function in which the CWE warning was generated. root_function: Tid, - /// Pairs of object IDs and the sites where the object was freed. - /// If the free-site is the same function call from which the object ID originates - /// then the CWE needs to be post-processed to give more exact information about the - /// free-site inside the function call. - object_and_free_ids: Vec<(AbstractIdentifier, Tid)>, + /// Pairs of object IDs and the paths to the actual free sites. + object_and_free_ids: Vec<(AbstractIdentifier, Vec)>, } impl WarningContext { /// Generate a new warning context object. pub fn new( cwe: CweWarning, - object_and_free_ids: Vec<(AbstractIdentifier, Tid)>, + object_and_free_ids: Vec<(AbstractIdentifier, Vec)>, root_function: Tid, ) -> Self { WarningContext { @@ -173,202 +159,94 @@ impl WarningContext { } } -/// For each function call TID collect the state of the callee just before returning to the caller. -fn collect_return_site_states<'a>( - fixpoint: &Computation>>, -) -> HashMap { - let mut call_tid_to_return_state_map: HashMap = HashMap::new(); - let graph = fixpoint.get_graph(); - for node in graph.node_indices() { - let call_tid = match graph[node] { - Node::CallReturn { call, return_: _ } => call.0.term.jmps[0].tid.clone(), - _ => continue, - }; - let node_value = match fixpoint.get_node_value(node) { - Some(value) => value, - None => continue, - }; - let return_state = match node_value { - NodeValue::CallFlowCombinator { - call_stub: _, - interprocedural_flow, - } => { - if let Some(state) = interprocedural_flow { - state.clone() - } else { - continue; - } - } - _ => panic!("Unexpexted NodeValue type encountered."), - }; - // There exists one CallReturn node for each return instruction in the callee, - // so we have to merge the corresponding states here. - call_tid_to_return_state_map - .entry(call_tid) - .and_modify(|saved_return_state| { - *saved_return_state = saved_return_state.merge(&return_state) - }) - .or_insert(return_state); - } - call_tid_to_return_state_map -} - -/// If the ID of the "free"-site is the same call from which the object ID originates from -/// then (recursively) identify the real "free"-site inside the call. -/// Also return a list of call TIDs that lead to the real "free"-site. -/// -/// The function returns an error if the source object was already flagged in some of the callees. -/// In such a case the corresponding CWE warning should be removed, -/// since there already exists another CWE warning with the same root cause. +/// Shorten the path to the "free"-site so that it ends in the first call +/// that is not contained in the path to the object origin. fn get_shortended_path_to_source_of_free( object_id: &AbstractIdentifier, - free_id: &Tid, - return_site_states: &HashMap, -) -> Result<(Tid, Vec), ()> { - if let (inner_object, Some(path_hint_id)) = object_id.without_last_path_hint() { - if path_hint_id == *free_id { - if let Some(return_state) = return_site_states.get(free_id) { - if return_state.is_id_already_flagged(&inner_object) { - return Err(()); - } - if let Some(inner_free) = return_state.get_free_tid_if_dangling(&inner_object) { - let (root_free, mut callgraph_ids) = get_shortended_path_to_source_of_free( - &inner_object, - inner_free, - return_site_states, - )?; - callgraph_ids.push(path_hint_id); - return Ok((root_free, callgraph_ids)); - } - } + free_path: &[Tid], +) -> Vec { + let mut object_id = object_id.clone(); + let mut shortened_free_path = free_path.to_vec(); + while let (shortened_id, Some(last_path_hint)) = object_id.without_last_path_hint() { + if Some(&last_path_hint) == shortened_free_path.last() { + object_id = shortened_id; + shortened_free_path.pop(); + } else { + break; } } - // No inner source apart from the given free_id could be identified - Ok((free_id.clone(), Vec::new())) + // Return the free path without the shortened free path + if shortened_free_path.is_empty() { + free_path.to_vec() + } else { + free_path[(shortened_free_path.len() - 1)..].to_vec() + } } -/// Get the full path in the call-graph connecting the `object_id` to the site where it gets freed. -/// Note that there may be several paths to "free" sites in the call-graph. -/// This function returns just one (random) path to such a "free" site. -/// -/// When calling this function non-recursively, the `collectect_callgraph_ids` should be empty. -/// -/// The function returns an error if the source object was already flagged in some of the callees. -/// In such a case the corresponding CWE warning should be removed, -/// since there already exists another CWE warning with the same root cause. -fn get_full_path_to_source_of_free<'a>( +/// Get the part of the path to the "free"-site that is not shared with the path to the object origin site. +fn get_root_cause_for_returned_dangling_pointers( object_id: &AbstractIdentifier, - free_id: &Tid, - return_site_states: &HashMap, - pointer_inference: &'a PointerInference<'a>, - mut collected_callgraph_ids: Vec, -) -> Result<(Tid, Vec), ()> { - if collected_callgraph_ids.contains(free_id) { - // This path is recursive and thus not a (shortest) path to an actual `free`-site. - return Err(()); - } - // Get callee information. If unsuccessful, then the `free_id` should already be the source site. - let id_replacement_map = match pointer_inference.get_id_renaming_map_at_call_tid(free_id) { - Some(map) => map, - None => return Ok((free_id.clone(), collected_callgraph_ids)), - }; - let return_state = match return_site_states.get(free_id) { - Some(state) => state, - None => return Ok((free_id.clone(), collected_callgraph_ids)), - }; - // Check whether the free site in the callee is already flagged. - for flagged_id in return_state.get_already_flagged_objects() { - if let Some(caller_data) = id_replacement_map.get(&flagged_id) { - if caller_data.get_relative_values().contains_key(object_id) { - // A corresponding object ID was already flagged in a callee, - // so we want to suppress this CWE warning as a duplicate of the already flagged CWE in the callee. - if object_id.get_tid() != &return_state.current_fn_tid { - return Err(()); - } else { - // This is a recursive call and the object is a parameter to this call. - // We treat the call as the root cause - // to avoid erroneously suppressing some CWE warnings based on recursive calls. - return Ok((free_id.clone(), collected_callgraph_ids)); - } - } + free_path: &[Tid], +) -> Vec { + let mut object_id = object_id.clone(); + let mut shortened_free_path = free_path.to_vec(); + while let (shortened_id, Some(last_path_hint)) = object_id.without_last_path_hint() { + if Some(&last_path_hint) == shortened_free_path.last() { + object_id = shortened_id; + shortened_free_path.pop(); + } else { + break; } } - // If the object is a parameter to the callee then recursively find the real free site inside the callee - for (callee_id, free_site_in_callee) in return_state.get_dangling_objects() { - if collected_callgraph_ids.contains(&free_site_in_callee) { - // we skip potentially recursive paths - continue; - } - if let Some(caller_data) = id_replacement_map.get(&callee_id) { - if caller_data.get_relative_values().contains_key(object_id) { - collected_callgraph_ids.push(free_id.clone()); - return get_full_path_to_source_of_free( - &callee_id, - &free_site_in_callee, - return_site_states, - pointer_inference, - collected_callgraph_ids, - ); - } - } - } - // If the object originates from the same call that also frees the object, - // then use the path hints of the object ID to find the `free` site inside the callee. - if let (inner_object, Some(path_hint_id)) = object_id.without_last_path_hint() { - if path_hint_id == *free_id { - if let Some(return_state) = return_site_states.get(free_id) { - if return_state.is_id_already_flagged(&inner_object) { - return Err(()); - } - if let Some(inner_free) = return_state.get_free_tid_if_dangling(&inner_object) { - collected_callgraph_ids.push(free_id.clone()); - return get_full_path_to_source_of_free( - &inner_object, - inner_free, - return_site_states, - pointer_inference, - collected_callgraph_ids, - ); - } - } - } + if shortened_free_path.is_empty() { + vec![free_path[0].clone()] + } else { + shortened_free_path } - // The `free_id` is an internal call, but no `free` site was found inside the callee. - // In theory, this case should never happen. - // We treat it like the `free_id` is the source `free` to at least return some useful information if it happens anyway. - Ok((free_id.clone(), collected_callgraph_ids)) +} + +/// Return `true` if the object originates in the same call as the "free"-site. +fn is_case_of_returned_dangling_pointer(object_id: &AbstractIdentifier, free_path: &[Tid]) -> bool { + // This implicitly uses that the `free_path` is never empty. + object_id.get_path_hints().last() == free_path.last() } /// Generate context information for CWE warnings. /// E.g. relevant callgraph addresses are added to each CWE here. -fn generate_context_information_for_warnings<'a>( - return_site_states: HashMap, - warnings: HashSet, +fn generate_context_information_for_warnings( + warnings: BTreeSet, generate_full_paths_to_free_site: bool, - pointer_inference: &'a PointerInference<'a>, ) -> BTreeSet { let mut processed_warnings = BTreeSet::new(); + let mut root_causes_for_returned_dangling_pointers = HashSet::new(); + for mut warning in warnings { let mut context_infos = Vec::new(); - let mut relevant_callgraph_tids = Vec::new(); - for (object_id, free_id) in warning.object_and_free_ids.iter() { - let source_free_site_info = if generate_full_paths_to_free_site { - get_full_path_to_source_of_free( - object_id, - free_id, - &return_site_states, - pointer_inference, - Vec::new(), - ) - } else { - get_shortended_path_to_source_of_free(object_id, free_id, &return_site_states) - }; - if let Ok((root_free_id, mut callgraph_ids_to_free)) = source_free_site_info { - relevant_callgraph_tids.append(&mut callgraph_ids_to_free); - context_infos.push(format!( - "Accessed ID {object_id} may have been freed before at {root_free_id}." - )); + let mut relevant_callgraph_tids = BTreeSet::new(); + for (object_id, mut free_path) in warning.object_and_free_ids.into_iter() { + if is_case_of_returned_dangling_pointer(&object_id, &free_path) { + let root_cause = + get_root_cause_for_returned_dangling_pointers(&object_id, &free_path); + if root_causes_for_returned_dangling_pointers.contains(&root_cause) { + // Skip this warning root cause, since another warning with the same root cause was already generated. + // FIXME: This is a coarse heuristic to reduce false positives. + // However, it is still possible that some but not all of these cases are real bugs + // and that this heuristic chooses the wrong representative. + continue; + } else { + root_causes_for_returned_dangling_pointers.insert(root_cause); + } } + if !generate_full_paths_to_free_site { + free_path = get_shortended_path_to_source_of_free(&object_id, &free_path); + } + for id in &free_path[1..] { + relevant_callgraph_tids.insert(id.clone()); + } + context_infos.push(format!( + "Accessed ID {object_id} may have been freed before at {}.", + free_path[0] + )); } if context_infos.is_empty() { // Skip (delete) this CWE warning, @@ -402,33 +280,23 @@ pub mod tests { #[test] fn test_warning_context_generation() { - let project = Project::mock_x64(); - let pointer_inference = PointerInference::mock(&project); let id = AbstractIdentifier::new( Tid::new("object_origin_tid"), AbstractLocation::Register(variable!("RAX:8")), ); - let path_id = id.with_path_hint(Tid::new("call_tid")).unwrap(); - let object_and_free_ids = vec![(path_id, Tid::new("call_tid"))]; + let object_id = id.with_path_hint(Tid::new("call_tid")).unwrap(); + let object_and_free_ids = vec![( + object_id.clone(), + vec![Tid::new("free_tid"), Tid::new("call_tid")], + )]; let cwe = CweWarning::new("CWE416", "test", "mock_cwe"); let warning_context = WarningContext::new(cwe, object_and_free_ids, Tid::new("root_func_tid")); - let warnings = HashSet::from([warning_context]); + let warnings = BTreeSet::from([warning_context.clone()]); // Test warning context generation - let return_state = State::mock( - Tid::new("callee_tid"), - &[(id.clone(), Tid::new("free_tid"))], - &[], - ); - let return_site_states = HashMap::from([(Tid::new("call_tid"), return_state)]); - let processed_warnings = generate_context_information_for_warnings( - return_site_states, - warnings.clone(), - false, - &pointer_inference, - ); + let processed_warnings = generate_context_information_for_warnings(warnings.clone(), false); assert_eq!(processed_warnings.len(), 1); let processed_cwe = processed_warnings.iter().next().unwrap(); assert_eq!(&processed_cwe.other[0], &[ @@ -437,14 +305,21 @@ pub mod tests { ]); // Test warning filtering - let return_state = State::mock(Tid::new("callee_tid"), &[], &[id.clone()]); - let return_site_states = HashMap::from([(Tid::new("call_tid"), return_state)]); - let processed_warnings = generate_context_information_for_warnings( - return_site_states, - warnings, - false, - &pointer_inference, - ); - assert_eq!(processed_warnings.len(), 0) + let object_and_free_ids_2 = vec![( + object_id + .with_path_hint(Tid::new("outer_call_tid")) + .unwrap(), + vec![ + Tid::new("free_tid"), + Tid::new("call_tid"), + Tid::new("outer_call_tid"), + ], + )]; + let cwe_2 = CweWarning::new("CWE416", "test", "mock_cwe_2"); + let warning_context_2 = + WarningContext::new(cwe_2, object_and_free_ids_2, Tid::new("root_func_tid_2")); + let warnings = BTreeSet::from([warning_context, warning_context_2]); + let processed_warnings = generate_context_information_for_warnings(warnings, false); + assert_eq!(processed_warnings.len(), 1) } } diff --git a/src/cwe_checker_lib/src/checkers/cwe_416/state.rs b/src/cwe_checker_lib/src/checkers/cwe_416/state.rs index b03f3072d..bd35037bb 100644 --- a/src/cwe_checker_lib/src/checkers/cwe_416/state.rs +++ b/src/cwe_checker_lib/src/checkers/cwe_416/state.rs @@ -10,23 +10,44 @@ use std::collections::BTreeMap; #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] enum ObjectState { /// The object is already freed, i.e. pointers to it are dangling. - /// The associated TID denotes the point in time when the object was freed. - Dangling(Tid), + /// The associated TIDs denote the point in time when the object was freed + /// and possibly the call path taken to that point in time. + Dangling(Vec), /// The object is already freed and a use-after-free CWE message for it was already generated. /// This object state is used to prevent duplicate CWE warnings with the same root cause. - AlreadyFlagged, + /// It still holds a path to a point in time where the object was freed. + AlreadyFlagged(Vec), } impl AbstractDomain for ObjectState { /// Merge two object states. - /// If both object states are dangling then use the source TID of `self` in the result. + /// If both object states are identical then use the shorter path to `free` in the result. fn merge(&self, other: &Self) -> Self { + use std::cmp::Ordering; + match (self, other) { - (ObjectState::AlreadyFlagged, _) | (_, ObjectState::AlreadyFlagged) => { - ObjectState::AlreadyFlagged + ( + ObjectState::AlreadyFlagged(free_path), + ObjectState::AlreadyFlagged(other_free_path), + ) => { + let shortest_path = match free_path.len().cmp(&other_free_path.len()) { + Ordering::Less => free_path.clone(), + Ordering::Equal => std::cmp::min(free_path, other_free_path).clone(), + Ordering::Greater => other_free_path.clone(), + }; + ObjectState::AlreadyFlagged(shortest_path) + } + (ObjectState::AlreadyFlagged(free_path), _) + | (_, ObjectState::AlreadyFlagged(free_path)) => { + ObjectState::AlreadyFlagged(free_path.clone()) } - (ObjectState::Dangling(tid), ObjectState::Dangling(other_tid)) => { - ObjectState::Dangling(std::cmp::min(tid, other_tid).clone()) + (ObjectState::Dangling(free_path), ObjectState::Dangling(other_free_path)) => { + let shortest_path = match free_path.len().cmp(&other_free_path.len()) { + Ordering::Less => free_path.clone(), + Ordering::Equal => std::cmp::min(free_path, other_free_path).clone(), + Ordering::Greater => other_free_path.clone(), + }; + ObjectState::Dangling(shortest_path) } } } @@ -37,12 +58,19 @@ impl AbstractDomain for ObjectState { } } -/// The `State` currently only keeps track of the list of TIDs of memory object that may have been freed already +/// The `State` keeps track of the list of abstract IDs of memory objects that may have been freed already /// together with the corresponding object states. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] pub struct State { + /// The TID of the current function. pub current_fn_tid: Tid, + /// Map from the abstract ID of dangling objects to their object state. dangling_objects: DomainMap, + /// Memory objects that were generated and freed in the same call are tracked in a separate map. + /// Such objects are often analysis errors. + /// Tracking them separately prevents them from masking genuine Use-After-Free cases in the caller. + dangling_objects_generated_and_freed_in_same_call: + DomainMap, } impl State { @@ -51,64 +79,37 @@ impl State { State { current_fn_tid, dangling_objects: BTreeMap::new().into(), + dangling_objects_generated_and_freed_in_same_call: BTreeMap::new().into(), } } - /// Return whether the given object ID is already flagged in this state, - /// i.e. whether a CWE warning was already generated for this object. - pub fn is_id_already_flagged(&self, object_id: &AbstractIdentifier) -> bool { - self.dangling_objects.get(object_id) == Some(&ObjectState::AlreadyFlagged) - } - - /// If the given `object_id` represents a dangling object, return the TID of the site where it was freed. - pub fn get_free_tid_if_dangling(&self, object_id: &AbstractIdentifier) -> Option<&Tid> { - if let Some(ObjectState::Dangling(free_tid)) = self.dangling_objects.get(object_id) { - Some(free_tid) - } else { - None - } - } - - /// Return a list of abstract identifiers that are marked as "flagged" in the current state, - /// i.e. they already triggered the generation of a CWE warning. - pub fn get_already_flagged_objects(&self) -> Vec { - self.dangling_objects - .iter() - .filter_map(|(id, object_state)| match object_state { - ObjectState::AlreadyFlagged => Some(id.clone()), - _ => None, - }) - .collect() - } - - /// Return a list of abstract identifiers that are marked as "dangling" in the current state - /// together with the TIDs of the corresponding `free` instruction. - pub fn get_dangling_objects(&self) -> Vec<(AbstractIdentifier, Tid)> { - self.dangling_objects - .iter() - .filter_map(|(id, object_state)| match object_state { - ObjectState::AlreadyFlagged => None, - ObjectState::Dangling(free_id) => Some((id.clone(), free_id.clone())), - }) - .collect() - } - /// Check the given address on whether it may point to already freed memory. /// For each possible dangling pointer target the abstract ID of the object - /// and the TID of the corresponding site where the object was freed is returned. + /// and the path to the corresponding site where the object was freed is returned. /// The object states of corresponding memory objects are set to [`ObjectState::AlreadyFlagged`] /// to prevent reporting duplicate CWE messages with the same root cause. pub fn check_address_for_use_after_free( &mut self, address: &Data, - ) -> Option> { + ) -> Option)>> { let mut free_ids_of_dangling_pointers = Vec::new(); for id in address.get_relative_values().keys() { - if let Some(ObjectState::Dangling(free_id)) = self.dangling_objects.get(id) { - free_ids_of_dangling_pointers.push((id.clone(), free_id.clone())); + if let Some(ObjectState::Dangling(free_id_path)) = self.dangling_objects.get(id) { + let free_id_path = free_id_path.clone(); + free_ids_of_dangling_pointers.push((id.clone(), free_id_path.clone())); self.dangling_objects - .insert(id.clone(), ObjectState::AlreadyFlagged); + .insert(id.clone(), ObjectState::AlreadyFlagged(free_id_path)); + } + if let Some(ObjectState::Dangling(free_id_path)) = self + .dangling_objects_generated_and_freed_in_same_call + .get(id) + { + let free_id_path = free_id_path.clone(); + free_ids_of_dangling_pointers.push((id.clone(), free_id_path.clone())); + + self.dangling_objects_generated_and_freed_in_same_call + .insert(id.clone(), ObjectState::AlreadyFlagged(free_id_path)); } } if free_ids_of_dangling_pointers.is_empty() { @@ -118,6 +119,44 @@ impl State { } } + /// Mark the given object ID as freed with the given `free_id_path` denoting the path to the site where it is freed. + /// + /// If the object ID was already marked as dangling, + /// return it plus the (previously saved) path to the site where it was freed. + #[must_use] + fn mark_as_freed( + &mut self, + object_id: &AbstractIdentifier, + free_id_path: Vec, + pi_state: &PiState, + ) -> Option<(AbstractIdentifier, Vec)> { + if pi_state.memory.is_unique_object(object_id).ok() == Some(false) { + // FIXME: We cannot distinguish different objects represented by the same ID. + // So to avoid producing lots of false positive warnings + // we ignore these cases by not marking these IDs as freed. + return None; + } + if object_id.get_path_hints().last() == free_id_path.last() { + // The object was created in the same call as it is now freed. + if let Some(ObjectState::Dangling(old_free_id_path)) = self + .dangling_objects_generated_and_freed_in_same_call + .insert( + object_id.clone(), + ObjectState::Dangling(free_id_path.clone()), + ) + { + return Some((object_id.clone(), old_free_id_path.clone())); + } + } else if let Some(ObjectState::Dangling(old_free_id_path)) = self.dangling_objects.insert( + object_id.clone(), + ObjectState::Dangling(free_id_path.clone()), + ) { + return Some((object_id.clone(), old_free_id_path.clone())); + } + + None + } + /// All TIDs that the given `param` may point to are marked as freed, i.e. pointers to them are dangling. /// For each ID that was already marked as dangling return a string describing the root cause of a possible double free bug. pub fn handle_param_of_free_call( @@ -125,22 +164,13 @@ impl State { call_tid: &Tid, param: &Data, pi_state: &PiState, - ) -> Option> { + ) -> Option)>> { // FIXME: This function could also generate debug log messages whenever nonsensical information is detected. // E.g. stack frame IDs or non-zero ID offsets can be indicators of other bugs. let mut warnings = Vec::new(); for id in param.get_relative_values().keys() { - if pi_state.memory.is_unique_object(id).ok() == Some(false) { - // FIXME: We cannot distinguish different objects represented by the same ID. - // So to avoid producing lots of false positive warnings - // we ignore these cases by not marking these IDs as freed. - continue; - } - if let Some(ObjectState::Dangling(old_free_id)) = self - .dangling_objects - .insert(id.clone(), ObjectState::Dangling(call_tid.clone())) - { - warnings.push((id.clone(), old_free_id.clone())); + if let Some(warning_data) = self.mark_as_freed(id, vec![call_tid.clone()], pi_state) { + warnings.push(warning_data); } } if !warnings.is_empty() { @@ -151,8 +181,7 @@ impl State { } /// Add objects that were freed in the callee of a function call to the list of dangling pointers of `self`. - /// May return a list of warnings if cases of possible double frees are detected, - /// i.e. if an already freed object may also have been freed in the callee. + /// Note that this function does not check for double frees. pub fn collect_freed_objects_from_called_function( &mut self, state_before_return: &State, @@ -163,23 +192,15 @@ impl State { for (callee_id, callee_object_state) in state_before_return.dangling_objects.iter() { if let Some(caller_value) = id_replacement_map.get(callee_id) { for caller_id in caller_value.get_relative_values().keys() { - if pi_state.memory.is_unique_object(caller_id).ok() != Some(false) { - // FIXME: We cannot distinguish different objects represented by the same ID. - // So to avoid producing lots of false positive warnings we ignore these cases. - match (callee_object_state, self.dangling_objects.get(caller_id)) { - // Case 1: The dangling object is unknown to the caller, so we add it. - (ObjectState::Dangling(_), None) - | (ObjectState::AlreadyFlagged, None) => { - self.dangling_objects.insert( - caller_id.clone(), - ObjectState::Dangling(call_tid.clone()), - ); - } - // Case 2: The dangling object is already known to the caller. - // If this were a case of Use-After-Free, then this should have been flagged when checking the call parameters. - // Thus we can simply leave the object state as it is. - (_, Some(ObjectState::Dangling(_))) - | (_, Some(&ObjectState::AlreadyFlagged)) => (), + match callee_object_state { + ObjectState::Dangling(callee_free_path) + | ObjectState::AlreadyFlagged(callee_free_path) => { + let mut free_id_path = callee_free_path.clone(); + free_id_path.push(call_tid.clone()); + // FIXME: If the object was not created in the callee and it is also marked as flagged in the callee + // then one could interpret accesses in the caller as duplicates and mark the object ID as already flagged. + // But analysis errors in the callee could mask real Use-After-Frees in the caller if we do that. + let _ = self.mark_as_freed(caller_id, free_id_path, pi_state); } } } @@ -194,6 +215,9 @@ impl AbstractDomain for State { State { current_fn_tid: self.current_fn_tid.clone(), dangling_objects: self.dangling_objects.merge(&other.dangling_objects), + dangling_objects_generated_and_freed_in_same_call: self + .dangling_objects_generated_and_freed_in_same_call + .merge(&other.dangling_objects_generated_and_freed_in_same_call), } } @@ -209,23 +233,45 @@ impl State { #[allow(dead_code)] pub fn to_json_compact(&self) -> serde_json::Value { use serde_json::*; + let format_vec = |vec| { + let mut string = String::new(); + for elem in vec { + string += &format!("{},", elem); + } + string + }; + let mut state_map = Map::new(); state_map.insert( "current_function".to_string(), Value::String(format!("{}", self.current_fn_tid)), ); for (id, object_state) in self.dangling_objects.iter() { - if let ObjectState::Dangling(free_tid) = object_state { - state_map.insert( + match object_state { + ObjectState::Dangling(free_path) => state_map.insert( format!("{id}"), - Value::String(format!("Dangling({free_tid})")), - ); - } else { - state_map.insert( + Value::String(format!("Dangling([{}])", format_vec(free_path))), + ), + ObjectState::AlreadyFlagged(free_path) => state_map.insert( format!("{id}"), - Value::String("Already flagged".to_string()), - ); - } + Value::String(format!("Already flagged([{}])", format_vec(free_path))), + ), + }; + } + for (id, object_state) in self + .dangling_objects_generated_and_freed_in_same_call + .iter() + { + match object_state { + ObjectState::Dangling(free_path) => state_map.insert( + format!("{id} (already dangling in callee)"), + Value::String(format!("Dangling([{}])", format_vec(free_path))), + ), + ObjectState::AlreadyFlagged(free_path) => state_map.insert( + format!("{id} (already dangling in callee)"), + Value::String(format!("Already flagged([{}])", format_vec(free_path))), + ), + }; } Value::Object(state_map) } @@ -237,37 +283,16 @@ pub mod tests { use crate::{bitvec, intermediate_representation::parsing, variable}; use std::collections::BTreeSet; - impl State { - pub fn mock( - current_fn_tid: Tid, - dangling_ids: &[(AbstractIdentifier, Tid)], - already_flagged_ids: &[AbstractIdentifier], - ) -> Self { - let mut state = State::new(current_fn_tid); - for (id, free_id) in dangling_ids.iter() { - state - .dangling_objects - .insert(id.clone(), ObjectState::Dangling(free_id.clone())); - } - for id in already_flagged_ids.iter() { - state - .dangling_objects - .insert(id.clone(), ObjectState::AlreadyFlagged); - } - state - } - } - #[test] fn test_check_address_for_use_after_free() { let mut state = State::new(Tid::new("current_fn")); state.dangling_objects.insert( AbstractIdentifier::mock("obj_id", "RAX", 8), - ObjectState::Dangling(Tid::new("free_call")), + ObjectState::Dangling(vec![Tid::new("free_call")]), ); state.dangling_objects.insert( AbstractIdentifier::mock("flagged_obj_id", "RAX", 8), - ObjectState::AlreadyFlagged, + ObjectState::AlreadyFlagged(vec![Tid::new("free_call")]), ); let address = Data::mock_from_target_map(BTreeMap::from([ ( @@ -293,14 +318,14 @@ pub mod tests { .dangling_objects .get(&AbstractIdentifier::mock("obj_id", "RAX", 8)) .unwrap(), - ObjectState::AlreadyFlagged + ObjectState::AlreadyFlagged(vec![Tid::new("free_call")]) ); assert_eq!( *state .dangling_objects .get(&AbstractIdentifier::mock("flagged_obj_id", "RAX", 8)) .unwrap(), - ObjectState::AlreadyFlagged + ObjectState::AlreadyFlagged(vec![Tid::new("free_call")]) ); } @@ -321,7 +346,7 @@ pub mod tests { .dangling_objects .get(&AbstractIdentifier::mock("obj_id", "RAX", 8)) .unwrap(), - ObjectState::Dangling(Tid::new("free_call")) + ObjectState::Dangling(vec![Tid::new("free_call")]) ); // Check that a second free operation yields a double free warning. assert!(state @@ -335,7 +360,7 @@ pub mod tests { let mut state_before_return = State::new(Tid::new("callee_fn_tid")); state_before_return.dangling_objects.insert( AbstractIdentifier::mock("callee_obj_tid", "RAX", 8), - ObjectState::Dangling(Tid::new("free_tid")), + ObjectState::Dangling(vec![Tid::new("free_tid")]), ); let pi_state = PiState::new(&variable!("RSP:8"), Tid::new("call"), BTreeSet::new()); let id_replacement_map = BTreeMap::from([( @@ -358,7 +383,7 @@ pub mod tests { .dangling_objects .get(&AbstractIdentifier::mock("caller_tid", "RBX", 8)) .unwrap(), - &ObjectState::Dangling(Tid::new("call_tid")) + &ObjectState::Dangling(vec![Tid::new("free_tid"), Tid::new("call_tid")]) ); } }