-
Notifications
You must be signed in to change notification settings - Fork 564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dotnet: add support for basic blocks #1326
Changes from 4 commits
969403a
c3778cf
e99525a
23d076e
30272d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: [package root]/LICENSE.txt | ||
# Unless required by applicable law or agreed to in writing, software distributed under the License | ||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and limitations under the License. | ||
|
||
from typing import Tuple, Iterator | ||
|
||
from dncil.cil.instruction import Instruction | ||
|
||
from capa.features.common import Feature, Characteristic | ||
from capa.features.address import Address | ||
from capa.features.basicblock import BasicBlock | ||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle | ||
|
||
|
||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: | ||
"""extract stackstring indicators from basic block""" | ||
raise NotImplementedError | ||
|
||
|
||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: | ||
"""extract tight loop indicators from a basic block""" | ||
first: Instruction = bbh.inner.instructions[0] | ||
last: Instruction = bbh.inner.instructions[-1] | ||
|
||
if any((last.is_br(), last.is_cond_br(), last.is_leave())): | ||
if last.operand == first.offset: | ||
yield Characteristic("tight loop"), bbh.address | ||
|
||
|
||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: | ||
"""extract basic block features""" | ||
for bb_handler in BASIC_BLOCK_HANDLERS: | ||
for feature, addr in bb_handler(fh, bbh): | ||
yield feature, addr | ||
yield BasicBlock(), bbh.address | ||
|
||
|
||
BASIC_BLOCK_HANDLERS = ( | ||
extract_bb_tight_loop, | ||
# extract_bb_stackstring, | ||
) |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -8,19 +8,21 @@ | |||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
from __future__ import annotations | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
from typing import Dict, List, Tuple, Union, Iterator, Optional | ||||||||||||||||||||||||||||
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
import dnfile | ||||||||||||||||||||||||||||
from dncil.cil.opcode import OpCodes | ||||||||||||||||||||||||||||
from dncil.cil.instruction import Instruction | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
import capa.features.extractors | ||||||||||||||||||||||||||||
import capa.features.extractors.dotnetfile | ||||||||||||||||||||||||||||
import capa.features.extractors.dnfile.file | ||||||||||||||||||||||||||||
import capa.features.extractors.dnfile.insn | ||||||||||||||||||||||||||||
import capa.features.extractors.dnfile.function | ||||||||||||||||||||||||||||
import capa.features.extractors.dnfile.basicblock | ||||||||||||||||||||||||||||
from capa.features.common import Feature | ||||||||||||||||||||||||||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress | ||||||||||||||||||||||||||||
from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod | ||||||||||||||||||||||||||||
from capa.features.extractors.dnfile.types import DnType, DnBasicBlock, DnUnmanagedMethod | ||||||||||||||||||||||||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor | ||||||||||||||||||||||||||||
from capa.features.extractors.dnfile.helpers import ( | ||||||||||||||||||||||||||||
get_dotnet_types, | ||||||||||||||||||||||||||||
|
@@ -98,7 +100,13 @@ def get_functions(self) -> Iterator[FunctionHandle]: | |||||||||||||||||||||||||||
fh: FunctionHandle = FunctionHandle( | ||||||||||||||||||||||||||||
address=DNTokenAddress(token), | ||||||||||||||||||||||||||||
inner=method, | ||||||||||||||||||||||||||||
ctx={"pe": self.pe, "calls_from": set(), "calls_to": set(), "cache": self.token_cache}, | ||||||||||||||||||||||||||||
ctx={ | ||||||||||||||||||||||||||||
"pe": self.pe, | ||||||||||||||||||||||||||||
"calls_from": set(), | ||||||||||||||||||||||||||||
"calls_to": set(), | ||||||||||||||||||||||||||||
"blocks": list(), | ||||||||||||||||||||||||||||
"cache": self.token_cache, | ||||||||||||||||||||||||||||
}, | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# method tokens should be unique | ||||||||||||||||||||||||||||
|
@@ -127,26 +135,100 @@ def get_functions(self) -> Iterator[FunctionHandle]: | |||||||||||||||||||||||||||
# those calls to other MethodDef methods e.g. calls to imported MemberRef methods | ||||||||||||||||||||||||||||
fh.ctx["calls_from"].add(address) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# calculate basic blocks | ||||||||||||||||||||||||||||
for fh in methods.values(): | ||||||||||||||||||||||||||||
# calculate basic block leaders where, | ||||||||||||||||||||||||||||
# 1. The first instruction of the intermediate code is a leader | ||||||||||||||||||||||||||||
# 2. Instructions that are targets of unconditional or conditional jump/goto statements are leaders | ||||||||||||||||||||||||||||
# 3. Instructions that immediately follow unconditional or conditional jump/goto statements are considered leaders | ||||||||||||||||||||||||||||
# https://www.geeksforgeeks.org/basic-blocks-in-compiler-design/ | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
leaders: Set[int] = set() | ||||||||||||||||||||||||||||
for idx, insn in enumerate(fh.inner.instructions): | ||||||||||||||||||||||||||||
if idx == 0: | ||||||||||||||||||||||||||||
# add #1 | ||||||||||||||||||||||||||||
leaders.add(insn.offset) | ||||||||||||||||||||||||||||
Comment on lines
+148
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could pull this out of the loop so its not re-checked on each iteration
Comment on lines
+147
to
+150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. assuming there's at least one instruction for each function |
||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
if any((insn.is_br(), insn.is_cond_br(), insn.is_leave())): | ||||||||||||||||||||||||||||
# add #2 | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
leaders.add(insn.operand) | ||||||||||||||||||||||||||||
# add #3 | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
try: | ||||||||||||||||||||||||||||
leaders.add(fh.inner.instructions[idx + 1].offset) | ||||||||||||||||||||||||||||
except IndexError: | ||||||||||||||||||||||||||||
# may encounter branch at end of method | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# build basic blocks using leaders | ||||||||||||||||||||||||||||
bb_curr: Optional[DnBasicBlock] = None | ||||||||||||||||||||||||||||
for idx, insn in enumerate(fh.inner.instructions): | ||||||||||||||||||||||||||||
if insn.offset in leaders: | ||||||||||||||||||||||||||||
# new leader, new basic block | ||||||||||||||||||||||||||||
bb_curr = DnBasicBlock(instructions=[insn]) | ||||||||||||||||||||||||||||
fh.ctx["blocks"].append(bb_curr) | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
assert bb_curr is not None | ||||||||||||||||||||||||||||
bb_curr.instructions.append(insn) | ||||||||||||||||||||||||||||
mr-tz marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+168
to
+172
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
optional style to clarify one of two cases happens here |
||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# create mapping of first instruction to basic block | ||||||||||||||||||||||||||||
bb_map: Dict[int, DnBasicBlock] = {} | ||||||||||||||||||||||||||||
for bb in fh.ctx["blocks"]: | ||||||||||||||||||||||||||||
if len(bb.instructions) == 0: | ||||||||||||||||||||||||||||
# TODO: consider error? | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
Comment on lines
+177
to
+179
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
i dont see how this can ever not be the case, given that the bb is initialized with at least one element above. so, we could keep this assertion around, or also remove it. |
||||||||||||||||||||||||||||
bb_map[bb.instructions[0].offset] = bb | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# connect basic blocks | ||||||||||||||||||||||||||||
for idx, bb in enumerate(fh.ctx["blocks"]): | ||||||||||||||||||||||||||||
if len(bb.instructions) == 0: | ||||||||||||||||||||||||||||
# TODO: consider error? | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
Comment on lines
+184
to
+186
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
ditto |
||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
last = bb.instructions[-1] | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# connect branches to other basic blocks | ||||||||||||||||||||||||||||
if any((last.is_br(), last.is_cond_br(), last.is_leave())): | ||||||||||||||||||||||||||||
bb_branch: Optional[DnBasicBlock] = bb_map.get(last.operand, None) | ||||||||||||||||||||||||||||
if bb_branch is not None: | ||||||||||||||||||||||||||||
# TODO: consider None error? | ||||||||||||||||||||||||||||
bb.succs.append(bb_branch) | ||||||||||||||||||||||||||||
bb_branch.preds.append(bb) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
if any((last.is_br(), last.is_leave())): | ||||||||||||||||||||||||||||
# no fallthrough | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
# connect fallthrough | ||||||||||||||||||||||||||||
try: | ||||||||||||||||||||||||||||
bb_next: DnBasicBlock = fh.ctx["blocks"][idx + 1] | ||||||||||||||||||||||||||||
bb.succs.append(bb_next) | ||||||||||||||||||||||||||||
Comment on lines
+204
to
+205
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this always hold just based on the next index? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is my thought process (please check my sanity 🙃):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense and should go in there as a comment. What about BBs that do not return (IDA calls them Another corner case from IDA at the
So we should handle There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree the above should be in a comment, just for clarity. i'm not sure whether or not .NET supports non-returning functions. its possibly that the runtime still requires the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. other assumptions we should probably document (and please verify):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and, right now i see that we're not doing anything with exceptions. i'm not sure how we'd want to represent this in the CFG. there would be a couple things to consider:
we may want to figure out how we need to operate on exception handlers before finalizing the decisions here. so maybe this is not a blocker, but something we add once we know we need it. |
||||||||||||||||||||||||||||
bb_next.preds.append(bb) | ||||||||||||||||||||||||||||
except IndexError: | ||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||
Comment on lines
+203
to
+208
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
make region that can throw and handle the exception as small as possible for clarity There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the |
||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
yield from methods.values() | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]: | ||||||||||||||||||||||||||||
yield from capa.features.extractors.dnfile.function.extract_features(fh) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]: | ||||||||||||||||||||||||||||
# each dotnet method is considered 1 basic block | ||||||||||||||||||||||||||||
yield BBHandle( | ||||||||||||||||||||||||||||
address=f.address, | ||||||||||||||||||||||||||||
inner=f.inner, | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
def get_basic_blocks(self, fh) -> Iterator[BBHandle]: | ||||||||||||||||||||||||||||
for bb in fh.ctx["blocks"]: | ||||||||||||||||||||||||||||
yield BBHandle( | ||||||||||||||||||||||||||||
address=DNTokenOffsetAddress( | ||||||||||||||||||||||||||||
fh.address, bb.instructions[0].offset - (fh.inner.offset + fh.inner.header_size) | ||||||||||||||||||||||||||||
), | ||||||||||||||||||||||||||||
inner=bb, | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
def extract_basic_block_features(self, fh, bbh): | ||||||||||||||||||||||||||||
# we don't support basic block features | ||||||||||||||||||||||||||||
mike-hunhoff marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||
yield from [] | ||||||||||||||||||||||||||||
yield from capa.features.extractors.dnfile.basicblock.extract_features(fh, bbh) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
def get_instructions(self, fh, bbh): | ||||||||||||||||||||||||||||
for insn in bbh.inner.instructions: | ||||||||||||||||||||||||||||
yield InsnHandle( | ||||||||||||||||||||||||||||
address=DNTokenOffsetAddress(bbh.address, insn.offset - (fh.inner.offset + fh.inner.header_size)), | ||||||||||||||||||||||||||||
address=DNTokenOffsetAddress(fh.address, insn.offset - (fh.inner.offset + fh.inner.header_size)), | ||||||||||||||||||||||||||||
inner=insn, | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you convince me of this? i think the instruction must be the target of a branch in order to be a leader. otherwise, its unreferenced code.