diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bfc635a0..020ad3436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff +- binja: add support for symtab names #1504 @xusheng6 ### Breaking Changes @@ -15,6 +16,9 @@ ### Bug Fixes - ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff +- binja: improve function call site detection @xusheng6 +- binja: use binaryninja.load to open files @xusheng6 +- binja: bump binja version to 3.5 #1789 @xusheng6 ### capa explorer IDA Pro plugin diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d46451e77..034b1636a 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -125,15 +125,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre """ for sym_name in bv.symbols: for sym in bv.symbols[sym_name]: - if sym.type == SymbolType.LibraryFunctionSymbol: - name = sym.short_name - yield FunctionName(name), sym.address - if name.startswith("_"): - # some linkers may prefix linked routines with a `_` to avoid name collisions. - # extract features for both the mangled and un-mangled representations. - # e.g. `_fwrite` -> `fwrite` - # see: https://stackoverflow.com/a/2628384/87207 - yield FunctionName(name[1:]), sym.address + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index a502a5f44..520de0b3f 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,8 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Tuple, Iterator -from binaryninja import Function, BinaryView, LowLevelILOperation +from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops @@ -23,13 +24,27 @@ def extract_function_calls_to(fh: FunctionHandle): # Everything that is a code reference to the current function is considered a caller, which actually includes # many other references that are NOT a caller. For example, an instruction `push function_start` will also be # considered a caller to the function - if caller.llil is not None and caller.llil.operation in [ + llil = caller.llil + if (llil is None) or llil.operation not in [ LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST, LowLevelILOperation.LLIL_JUMP, LowLevelILOperation.LLIL_TAILCALL, ]: - yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) + continue + + if llil.dest.value.type not in [ + RegisterValueType.ImportedAddressValue, + RegisterValueType.ConstantValue, + RegisterValueType.ConstantPointerValue, + ]: + continue + + address = llil.dest.value.value + if address != func.start: + continue + + yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) def extract_function_loop(fh: FunctionHandle): @@ -59,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_name(fh: FunctionHandle): + """extract function names (e.g., symtab names)""" + func: Function = fh.inner + bv: BinaryView = func.view + if bv is None: + return + + for sym in bv.get_symbols(func.start): + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address + + def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 3144fd15a..f2b8fefc2 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) candidate_addrs.append(stub_addr) for address in candidate_addrs: - sym = func.view.get_symbol_at(address) - if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]: - continue - - sym_name = sym.short_name - - lib_name = "" - import_lib = bv.lookup_imported_object_library(sym.address) - if import_lib is not None: - lib_name = import_lib[0].name - if lib_name.endswith(".dll"): - lib_name = lib_name[:-4] - elif lib_name.endswith(".so"): - lib_name = lib_name[:-3] - - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): - yield API(name), ih.address - - if sym_name.startswith("_"): - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + for sym in func.view.get_symbols(address): + if sym is None or sym.type not in [ + SymbolType.ImportAddressSymbol, + SymbolType.ImportedFunctionSymbol, + SymbolType.FunctionSymbol, + ]: + continue + + sym_name = sym.short_name + + lib_name = "" + import_lib = bv.lookup_imported_object_library(sym.address) + if import_lib is not None: + lib_name = import_lib[0].name + if lib_name.endswith(".dll"): + lib_name = lib_name[:-4] + elif lib_name.endswith(".so"): + lib_name = lib_name[:-3] + + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): yield API(name), ih.address + if sym_name.startswith("_"): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + yield API(name), ih.address + def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle diff --git a/capa/main.py b/capa/main.py index b91ca1e26..ae8421560 100644 --- a/capa/main.py +++ b/capa/main.py @@ -558,7 +558,8 @@ def get_extractor( sys.path.append(str(bn_api)) try: - from binaryninja import BinaryView, BinaryViewType + import binaryninja + from binaryninja import BinaryView except ImportError: raise RuntimeError( "Cannot import binaryninja module. Please install the Binary Ninja Python API first: " @@ -568,7 +569,7 @@ def get_extractor( import capa.features.extractors.binja.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - bv: BinaryView = BinaryViewType.get_view_of_file(str(path)) + bv: BinaryView = binaryninja.load(str(path)) if bv is None: raise RuntimeError(f"Binary Ninja cannot open file {path}") diff --git a/pyproject.toml b/pyproject.toml index ebf653d3c..46d09aa66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.286", + "ruff==0.0.291", "black==23.7.0", "isort==5.12.0", "mypy==1.5.1", diff --git a/rules b/rules index eba332e70..b9c2bc120 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit eba332e702d88927b5816770a9853dd0b3fbc47a +Subproject commit b9c2bc120e21154fd7e3e1d8b7150f8de92b1a50 diff --git a/tests/data b/tests/data index faf741a53..87bd888e1 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit faf741a538224f52d4412468f910d52a70911662 +Subproject commit 87bd888e1984a1e9f9ab8e63b8707794392f3156 diff --git a/tests/fixtures.py b/tests/fixtures.py index a8a930b34..230fa8032 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path): @lru_cache(maxsize=1) def get_binja_extractor(path: Path): - from binaryninja import Settings, BinaryViewType + import binaryninja + from binaryninja import Settings import capa.features.extractors.binja.extractor @@ -168,7 +169,7 @@ def get_binja_extractor(path: Path): if path.name.endswith("kernel32-64.dll_"): old_pdb = settings.get_bool("pdb.loadGlobalSymbols") settings.set_bool("pdb.loadGlobalSymbols", False) - bv = BinaryViewType.get_view_of_file(str(path)) + bv = binaryninja.load(str(path)) if path.name.endswith("kernel32-64.dll_"): settings.set_bool("pdb.loadGlobalSymbols", old_pdb) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index a2f0cd78f..3d51886d4 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -36,7 +36,7 @@ @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): @@ -69,4 +69,4 @@ def test_standalone_binja_backend(): @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") def test_binja_version(): version = binaryninja.core_version_info() - assert version.major == 3 and version.minor == 4 + assert version.major == 3 and version.minor == 5