From 6bae9d757d3f7e31f9be1ec5f11fe9ba2b97b04f Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 13 Sep 2023 14:46:47 +0000 Subject: [PATCH 1/9] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index eba332e70..b9c2bc120 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit eba332e702d88927b5816770a9853dd0b3fbc47a +Subproject commit b9c2bc120e21154fd7e3e1d8b7150f8de92b1a50 From d04ae5294eaa8e8f394b8e507817609b02bd7867 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 13 Sep 2023 14:50:29 +0000 Subject: [PATCH 2/9] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index faf741a53..9fa44d4eb 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit faf741a538224f52d4412468f910d52a70911662 +Subproject commit 9fa44d4ebcfac59b272511deedeb7f0ad104108c From fd3678904a90585c2b881978577e37f7dab7f839 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Sep 2023 14:36:44 +0000 Subject: [PATCH 3/9] build(deps-dev): bump ruff from 0.0.286 to 0.0.290 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.0.286 to 0.0.290. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.286...v0.0.290) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 41d5cc623..3fb6be347 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.286", + "ruff==0.0.290", "black==23.7.0", "isort==5.11.4", "mypy==1.5.1", From 59d03b3ba3b821dd08e71c4bd0539d16cc5121d1 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 20 Sep 2023 20:56:20 +0800 Subject: [PATCH 4/9] binja: bump Binary Ninja version to 3.5 --- tests/test_binja_features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index a2f0cd78f..fdb7ff88b 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -69,4 +69,4 @@ def test_standalone_binja_backend(): @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") def test_binja_version(): version = binaryninja.core_version_info() - assert version.major == 3 and version.minor == 4 + assert version.major == 3 and version.minor == 5 From bc71c941718e2b541064df7aafb770c695f96988 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 20 Sep 2023 20:56:36 +0800 Subject: [PATCH 5/9] binja: use binaryninja.load to open a binary --- capa/main.py | 5 +++-- tests/fixtures.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/capa/main.py b/capa/main.py index b91ca1e26..ae8421560 100644 --- a/capa/main.py +++ b/capa/main.py @@ -558,7 +558,8 @@ def get_extractor( sys.path.append(str(bn_api)) try: - from binaryninja import BinaryView, BinaryViewType + import binaryninja + from binaryninja import BinaryView except ImportError: raise RuntimeError( "Cannot import binaryninja module. Please install the Binary Ninja Python API first: " @@ -568,7 +569,7 @@ def get_extractor( import capa.features.extractors.binja.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - bv: BinaryView = BinaryViewType.get_view_of_file(str(path)) + bv: BinaryView = binaryninja.load(str(path)) if bv is None: raise RuntimeError(f"Binary Ninja cannot open file {path}") diff --git a/tests/fixtures.py b/tests/fixtures.py index a8a930b34..230fa8032 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path): @lru_cache(maxsize=1) def get_binja_extractor(path: Path): - from binaryninja import Settings, BinaryViewType + import binaryninja + from binaryninja import Settings import capa.features.extractors.binja.extractor @@ -168,7 +169,7 @@ def get_binja_extractor(path: Path): if path.name.endswith("kernel32-64.dll_"): old_pdb = settings.get_bool("pdb.loadGlobalSymbols") settings.set_bool("pdb.loadGlobalSymbols", False) - bv = BinaryViewType.get_view_of_file(str(path)) + bv = binaryninja.load(str(path)) if path.name.endswith("kernel32-64.dll_"): settings.set_bool("pdb.loadGlobalSymbols", old_pdb) From b3dccb3841400ea11907c23a6cb70734bb55f4a1 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Wed, 20 Sep 2023 20:57:19 +0800 Subject: [PATCH 6/9] binja: improve function call site detection --- CHANGELOG.md | 3 +++ capa/features/extractors/binja/function.py | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bfc635a0..fc0d9a50e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ ### Bug Fixes - ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff +- binja: improve function call site detection @xusheng6 +- binja: use binaryninja.load to open files @xusheng6 +- binja: bump binja version to 3.5 #1789 @xusheng6 ### capa explorer IDA Pro plugin diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index a502a5f44..d2e67aa3a 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Tuple, Iterator -from binaryninja import Function, BinaryView, LowLevelILOperation +from binaryninja import Function, BinaryView, RegisterValueType, LowLevelILOperation from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress @@ -23,13 +23,27 @@ def extract_function_calls_to(fh: FunctionHandle): # Everything that is a code reference to the current function is considered a caller, which actually includes # many other references that are NOT a caller. For example, an instruction `push function_start` will also be # considered a caller to the function - if caller.llil is not None and caller.llil.operation in [ + llil = caller.llil + if (llil is None) or llil.operation not in [ LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST, LowLevelILOperation.LLIL_JUMP, LowLevelILOperation.LLIL_TAILCALL, ]: - yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) + continue + + if llil.dest.value.type not in [ + RegisterValueType.ImportedAddressValue, + RegisterValueType.ConstantValue, + RegisterValueType.ConstantPointerValue, + ]: + continue + + address = llil.dest.value.value + if address != func.start: + continue + + yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) def extract_function_loop(fh: FunctionHandle): From 55af6f052f6ab9ffc73e4f67a837c2f0d68f077c Mon Sep 17 00:00:00 2001 From: Xusheng Date: Thu, 21 Sep 2023 17:24:42 +0800 Subject: [PATCH 7/9] binja: add support for symtab names. Fix #1504 --- CHANGELOG.md | 1 + capa/features/extractors/binja/file.py | 20 +++++----- capa/features/extractors/binja/function.py | 26 ++++++++++++- capa/features/extractors/binja/insn.py | 44 ++++++++++++---------- tests/test_binja_features.py | 2 +- 5 files changed, 61 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc0d9a50e..020ad3436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff +- binja: add support for symtab names #1504 @xusheng6 ### Breaking Changes diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d46451e77..034b1636a 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -125,15 +125,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre """ for sym_name in bv.symbols: for sym in bv.symbols[sym_name]: - if sym.type == SymbolType.LibraryFunctionSymbol: - name = sym.short_name - yield FunctionName(name), sym.address - if name.startswith("_"): - # some linkers may prefix linked routines with a `_` to avoid name collisions. - # extract features for both the mangled and un-mangled representations. - # e.g. `_fwrite` -> `fwrite` - # see: https://stackoverflow.com/a/2628384/87207 - yield FunctionName(name[1:]), sym.address + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index d2e67aa3a..520de0b3f 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,8 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Tuple, Iterator -from binaryninja import Function, BinaryView, RegisterValueType, LowLevelILOperation +from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops @@ -73,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_name(fh: FunctionHandle): + """extract function names (e.g., symtab names)""" + func: Function = fh.inner + bv: BinaryView = func.view + if bv is None: + return + + for sym in bv.get_symbols(func.start): + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address + + def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 3144fd15a..f2b8fefc2 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) candidate_addrs.append(stub_addr) for address in candidate_addrs: - sym = func.view.get_symbol_at(address) - if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]: - continue - - sym_name = sym.short_name - - lib_name = "" - import_lib = bv.lookup_imported_object_library(sym.address) - if import_lib is not None: - lib_name = import_lib[0].name - if lib_name.endswith(".dll"): - lib_name = lib_name[:-4] - elif lib_name.endswith(".so"): - lib_name = lib_name[:-3] - - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): - yield API(name), ih.address - - if sym_name.startswith("_"): - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + for sym in func.view.get_symbols(address): + if sym is None or sym.type not in [ + SymbolType.ImportAddressSymbol, + SymbolType.ImportedFunctionSymbol, + SymbolType.FunctionSymbol, + ]: + continue + + sym_name = sym.short_name + + lib_name = "" + import_lib = bv.lookup_imported_object_library(sym.address) + if import_lib is not None: + lib_name = import_lib[0].name + if lib_name.endswith(".dll"): + lib_name = lib_name[:-4] + elif lib_name.endswith(".so"): + lib_name = lib_name[:-3] + + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): yield API(name), ih.address + if sym_name.startswith("_"): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + yield API(name), ih.address + def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index fdb7ff88b..3d51886d4 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -36,7 +36,7 @@ @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): From 79cef0e78365d71004a4bc0e692ce146af9ce793 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 22 Sep 2023 10:33:01 +0000 Subject: [PATCH 8/9] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 9fa44d4eb..87bd888e1 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 9fa44d4ebcfac59b272511deedeb7f0ad104108c +Subproject commit 87bd888e1984a1e9f9ab8e63b8707794392f3156 From b0af78569cc27c1631ab319459071bf1825bf5c2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:16:40 +0000 Subject: [PATCH 9/9] build(deps-dev): bump ruff from 0.0.290 to 0.0.291 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.0.290 to 0.0.291. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.290...v0.0.291) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3fb6be347..fd89a03e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.290", + "ruff==0.0.291", "black==23.7.0", "isort==5.11.4", "mypy==1.5.1",