Skip to content

Commit

Permalink
Merge pull request #470 from vobst/issue_469
Browse files Browse the repository at this point in the history
Issue 469: Semantically invalid IR statements due to bad Subregister Substitution
  • Loading branch information
vobst authored Jun 18, 2024
2 parents 11de213 + 9c6cc26 commit 65efd5f
Show file tree
Hide file tree
Showing 13 changed files with 336 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
0.9-dev
===

- Fixed an issue in the pcode to IR translation (PR #470)
- Added initial benchmarking infrastructure (PR #464)
- Improve Control Flow Propagation normalization pass (PR #462)
- Improve taint analysis abstraction to simplify interprocedural bottom-up
Expand Down
73 changes: 69 additions & 4 deletions src/caller/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,47 @@ extern crate cwe_checker_lib; // Needed for the docstring-link to work

use anyhow::Context;
use anyhow::Error;
use clap::Parser;
use clap::{Parser, ValueEnum};

use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::pipeline::{disassemble_binary, AnalysisResults};
use cwe_checker_lib::utils::binary::BareMetalConfig;
use cwe_checker_lib::utils::debug;
use cwe_checker_lib::utils::log::{print_all_messages, LogLevel};
use cwe_checker_lib::utils::read_config_file;

use std::collections::{BTreeSet, HashSet};
use std::convert::From;
use std::path::PathBuf;

#[derive(ValueEnum, Clone, Debug, Copy)]
/// Selects which kind of debug output is displayed.
pub enum CliDebugMode {
/// Result of the Pointer Inference computation.
Pi,
/// Unnormalized IR form of the program.
IrRaw,
/// Normalized IR form of the program.
IrNorm,
/// Optimized IR form of the program.
IrOpt,
/// Output of the Ghidra plugin.
PcodeRaw,
}

impl From<&CliDebugMode> for debug::Stage {
fn from(mode: &CliDebugMode) -> Self {
use CliDebugMode::*;
match mode {
Pi => debug::Stage::Pi,
IrRaw => debug::Stage::Ir(debug::IrForm::Raw),
IrNorm => debug::Stage::Ir(debug::IrForm::Normalized),
IrOpt => debug::Stage::Ir(debug::IrForm::Optimized),
PcodeRaw => debug::Stage::Pcode(debug::PcodeForm::Raw),
}
}
}

#[derive(Debug, Parser)]
#[command(version, about)]
/// Find vulnerable patterns in binary executables
Expand Down Expand Up @@ -67,7 +99,39 @@ struct CmdlineArgs {
/// Output for debugging purposes.
/// The current behavior of this flag is unstable and subject to change.
#[arg(long, hide(true))]
debug: bool,
debug: Option<CliDebugMode>,

/// Read the saved output of the Pcode Extractor plugin from a file instead
/// of invoking Ghidra.
#[arg(long, hide(true))]
pcode_raw: Option<String>,
}

impl From<&CmdlineArgs> for debug::Settings {
fn from(args: &CmdlineArgs) -> Self {
let stage = match &args.debug {
None => debug::Stage::default(),
Some(mode) => mode.into(),
};
let verbosity = if args.verbose {
debug::Verbosity::Verbose
} else if args.quiet {
debug::Verbosity::Quiet
} else {
debug::Verbosity::default()
};

let mut builder = debug::SettingsBuilder::default()
.set_stage(stage)
.set_verbosity(verbosity)
.set_termination_policy(debug::TerminationPolicy::EarlyExit);

if let Some(pcode_raw) = &args.pcode_raw {
builder = builder.set_saved_pcode_raw(PathBuf::from(pcode_raw.clone()));
}

builder.build()
}
}

fn main() -> Result<(), Error> {
Expand All @@ -90,6 +154,7 @@ fn check_file_existence(file_path: &str) -> Result<String, String> {

/// Run the cwe_checker with Ghidra as its backend.
fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let debug_settings = args.into();
let mut modules = cwe_checker_lib::get_modules();
if args.module_versions {
// Only print the module versions and then quit.
Expand All @@ -111,7 +176,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());

let (binary, project, mut all_logs) =
disassemble_binary(&binary_file_path, bare_metal_config_opt, args.verbose)?;
disassemble_binary(&binary_file_path, bare_metal_config_opt, &debug_settings)?;

// Filter the modules to be executed.
if let Some(ref partial_module_list) = args.partial {
Expand Down Expand Up @@ -186,7 +251,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
// Print debug and then return.
// Right now there is only one debug printing function.
// When more debug printing modes exist, this behaviour will change!
if args.debug {
if debug_settings.should_debug(debug::Stage::Pi) {
cwe_checker_lib::analysis::pointer_inference::run(
&analysis_results,
serde_json::from_value(config["Memory"].clone()).unwrap(),
Expand Down
6 changes: 6 additions & 0 deletions src/cwe_checker_lib/src/abstract_domain/data/arithmetics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {

/// extract a sub-bitvector
fn subpiece(&self, low_byte: ByteSize, size: ByteSize) -> Self {
// Extracting zero-sized subpieces is an semantically invalid operation.
debug_assert_ne!(
size,
ByteSize::new(0),
"Attempt to extract zero-sized subpiece."
);
if low_byte == ByteSize::new(0) && size == self.bytesize() {
// The operation is a no-op
self.clone()
Expand Down
5 changes: 4 additions & 1 deletion src/cwe_checker_lib/src/abstract_domain/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,10 @@ impl IntervalDomain {
interval,
widening_lower_bound: lower_bound,
widening_upper_bound: upper_bound,
widening_delay: self.widening_delay >> low_byte.as_bit_length(),
widening_delay: self
.widening_delay
.overflowing_shr(low_byte.as_bit_length() as u32)
.0,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ fn mark_values_in_caller_global_mem_as_potentially_overwritten(
} else {
caller_global_mem_region.mark_interval_values_as_top(
*index as i64,
std::i64::MAX - 1,
i64::MAX - 1,
ByteSize::new(1),
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl State {
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_u64(address + offset as u64)
Bitvector::from_u64(address.wrapping_add(offset as u64))
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,22 @@ use crate::{expr, intermediate_representation::*, variable};

#[cfg(test)]
impl Expression {
/// Shortcut for creating a cast expression
/// Shortcut for creating a cast expression.
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
pub fn cast_to_size(self, op: CastOpType, result_size: ByteSize) -> Expression {
Expression::Cast {
op,
size: ByteSize::new(8),
size: result_size,
arg: Box::new(self),
}
}

/// Shortcut for creating a cast expression with target size 8.
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
self.cast_to_size(op, ByteSize::new(8))
}

/// Shortcut for creating a subpiece expression
#[cfg(test)]
pub fn subpiece(self, low_byte: ByteSize, size: ByteSize) -> Expression {
Expand Down
59 changes: 51 additions & 8 deletions src/cwe_checker_lib/src/pcode/subregister_substitution/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,23 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
fn replace_output_subregister(&mut self, def: Term<Def>) {
match &def.term {
Def::Assign { var, value } => {
// At this point, the code should be in a form where variables
// being assigned and values that are assigned are of the same
// size.
debug_assert_eq!(
var.size,
value.bytesize(),
"Encountered invalid assignment: variable {} has size {} vs. value {} has size {}.",
var,
var.size,
value,
value.bytesize()
);
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
// Check if the variable being assigned is a subregister.
if is_subregister_assignment(var, base_register) {
// The register is not a base register and should be replaced.
if self.is_next_def_cast_to_base_register(var) {
let mut output = self.input_iter.next().unwrap().clone();
Expand All @@ -128,14 +143,13 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
self.output_defs.push(output);
return;
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
let output_var: Variable = base_register.into();
let register = into_subregister(register, var);
let output_expression =
piece_base_register_assignment_expression_together(
value,
base_register,
register,
&register,
);
self.output_defs.push(Term {
tid: def.tid.clone(),
Expand All @@ -151,7 +165,9 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
}
Def::Load { var, address } => {
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
if is_subregister_assignment(var, base_register) {
// The register is not a base register and should be replaced.
// We need two replacement defs: One is a load into a temporary register
// and the second is a cast to the base register.
Expand All @@ -176,16 +192,15 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
}
self.output_defs.push(cast_to_base_def);
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
let register = into_subregister(register, var);
self.output_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_cast_to_base"),
term: Def::Assign {
var: base_register.into(),
value: piece_base_register_assignment_expression_together(
&Expression::Var(temp_reg),
base_register,
register,
&register,
),
},
});
Expand Down Expand Up @@ -359,5 +374,33 @@ fn piece_base_register_assignment_expression_together(
}
}

/// Helper to check wheter an assignment to `var` is a subregister assignment.
///
/// Assumes that `var` is held in a register and that `base_register` is the
/// corresponding full-size register.
fn is_subregister_assignment(var: &Variable, base_register: &RegisterProperties) -> bool {
var.name != base_register.register || var.size < base_register.size
}

/// Helper to create a subregister based on a variable.
///
/// Assumes that `var` is held in a register whose full-size version is equal
/// to, or a superregister of, `register`.
fn into_subregister(register: &RegisterProperties, var: &Variable) -> RegisterProperties {
// Background: If a subregister being assigned has the same
// name as the full-size register (but a different size), `register`
// will be the full register at this point. Thus, we correct the
// size manually and create a register with the full-size name but a
// smaller size. This should be a no-op if `register` is already the
// correct subregister.
// FIXME: This is incomplete if the LSB of the subregister and the full
// register are different. Until now this has not been observed as in those
// cases the register names are different.
let mut register = register.clone();
register.size = var.size;

register
}

#[cfg(test)]
mod tests;
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ fn piecing_or_zero_extending() {
tid: Tid::new("zext_ah_to_eax"),
term: Def::Assign {
var: variable!("EAX:4"),
value: Expression::cast(expr!("AH:1"), CastOpType::IntZExt),
value: Expression::cast_to_size(expr!("AH:1"), CastOpType::IntZExt, ByteSize::new(4)),
},
};
let zext_ah_to_rax = Term {
Expand Down
1 change: 0 additions & 1 deletion src/cwe_checker_lib/src/pcode/term.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::{BTreeSet, HashMap};
use std::usize;

use super::subregister_substitution::replace_input_subregister;
use super::{Expression, ExpressionType, RegisterProperties, Variable};
Expand Down
18 changes: 15 additions & 3 deletions src/cwe_checker_lib/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub use results::AnalysisResults;

use crate::intermediate_representation::{Project, RuntimeMemoryImage};
use crate::prelude::*;
use crate::utils::debug;
use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
Expand All @@ -17,18 +18,29 @@ use std::path::Path;
pub fn disassemble_binary(
binary_file_path: &Path,
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
debug_settings: &debug::Settings,
) -> Result<(Vec<u8>, Project, Vec<LogMessage>), Error> {
let binary: Vec<u8> =
std::fs::read(binary_file_path).context("Could not read from binary file path {}")?;
let (mut project, mut all_logs) = get_project_from_ghidra(
binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
verbose_flag,
debug_settings,
)?;

// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
debug_settings.print(&project.program.term, debug::Stage::Ir(debug::IrForm::Raw));
all_logs.append(&mut project.normalize_basic());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Normalized),
);
all_logs.append(&mut project.normalize_optimize());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Optimized),
);

// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
Expand Down
Loading

0 comments on commit 65efd5f

Please sign in to comment.