Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
4k4xs4pH1r3 authored Sep 25, 2023
2 parents 0ebc019 + 782a5b3 commit 91c0425
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 42 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New Features
- ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan
- ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff
- binja: add support for symtab names #1504 @xusheng6

### Breaking Changes

Expand All @@ -15,6 +16,9 @@

### Bug Fixes
- ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff
- binja: improve function call site detection @xusheng6
- binja: use binaryninja.load to open files @xusheng6
- binja: bump binja version to 3.5 #1789 @xusheng6

### capa explorer IDA Pro plugin

Expand Down
20 changes: 11 additions & 9 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
"""
for sym_name in bv.symbols:
for sym in bv.symbols[sym_name]:
if sym.type == SymbolType.LibraryFunctionSymbol:
name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
Expand Down
44 changes: 40 additions & 4 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License.
from typing import Tuple, Iterator

from binaryninja import Function, BinaryView, LowLevelILOperation
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
Expand All @@ -23,13 +24,27 @@ def extract_function_calls_to(fh: FunctionHandle):
# Everything that is a code reference to the current function is considered a caller, which actually includes
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
# considered a caller to the function
if caller.llil is not None and caller.llil.operation in [
llil = caller.llil
if (llil is None) or llil.operation not in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_JUMP,
LowLevelILOperation.LLIL_TAILCALL,
]:
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
continue

if llil.dest.value.type not in [
RegisterValueType.ImportedAddressValue,
RegisterValueType.ConstantValue,
RegisterValueType.ConstantPointerValue,
]:
continue

address = llil.dest.value.value
if address != func.start:
continue

yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)


def extract_function_loop(fh: FunctionHandle):
Expand Down Expand Up @@ -59,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address


def extract_function_name(fh: FunctionHandle):
"""extract function names (e.g., symtab names)"""
func: Function = fh.inner
bv: BinaryView = func.view
if bv is None:
return

for sym in bv.get_symbols(func.start):
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
44 changes: 24 additions & 20 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
candidate_addrs.append(stub_addr)

for address in candidate_addrs:
sym = func.view.get_symbol_at(address)
if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
for sym in func.view.get_symbols(address):
if sym is None or sym.type not in [
SymbolType.ImportAddressSymbol,
SymbolType.ImportedFunctionSymbol,
SymbolType.FunctionSymbol,
]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
yield API(name), ih.address


def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
Expand Down
5 changes: 3 additions & 2 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ def get_extractor(
sys.path.append(str(bn_api))

try:
from binaryninja import BinaryView, BinaryViewType
import binaryninja
from binaryninja import BinaryView
except ImportError:
raise RuntimeError(
"Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
Expand All @@ -568,7 +569,7 @@ def get_extractor(
import capa.features.extractors.binja.extractor

with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
bv: BinaryView = BinaryViewType.get_view_of_file(str(path))
bv: BinaryView = binaryninja.load(str(path))
if bv is None:
raise RuntimeError(f"Binary Ninja cannot open file {path}")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ dev = [
"flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.0.286",
"ruff==0.0.291",
"black==23.7.0",
"isort==5.12.0",
"mypy==1.5.1",
Expand Down
2 changes: 1 addition & 1 deletion rules
Submodule rules updated 1 files
+2 −0 lib/allocate-memory.yml
5 changes: 3 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path):

@lru_cache(maxsize=1)
def get_binja_extractor(path: Path):
from binaryninja import Settings, BinaryViewType
import binaryninja
from binaryninja import Settings

import capa.features.extractors.binja.extractor

Expand All @@ -168,7 +169,7 @@ def get_binja_extractor(path: Path):
if path.name.endswith("kernel32-64.dll_"):
old_pdb = settings.get_bool("pdb.loadGlobalSymbols")
settings.set_bool("pdb.loadGlobalSymbols", False)
bv = BinaryViewType.get_view_of_file(str(path))
bv = binaryninja.load(str(path))
if path.name.endswith("kernel32-64.dll_"):
settings.set_bool("pdb.loadGlobalSymbols", old_pdb)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_binja_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_binja_features(sample, scope, feature, expected):
Expand Down Expand Up @@ -69,4 +69,4 @@ def test_standalone_binja_backend():
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
def test_binja_version():
version = binaryninja.core_version_info()
assert version.major == 3 and version.minor == 4
assert version.major == 3 and version.minor == 5

0 comments on commit 91c0425

Please sign in to comment.