Skip to content

Commit

Permalink
refactor: rename functions and add docs to clarify parser.py
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed May 30, 2024
1 parent 773f8da commit 6ae9363
Showing 1 changed file with 115 additions and 81 deletions.
196 changes: 115 additions & 81 deletions coverage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,23 @@ class ArcStart:
"Line 17 didn't run because {cause}." The fragment can include "{lineno}"
to have `lineno` interpolated into it.
As an example, this code::
if something(x): # line 1
func(x) # line 2
more_stuff() # line 3
would have two ArcStarts:
- ArcStart(1, "the condition on line 1 was always true")
- ArcStart(1, "the condition on line 1 was never true")
The first would be used to create an arc from 1 to 3, creating a message like
"line 1 didn't jump to line 3 because the condition on line 1 was always true."
The second would be used for the arc from 1 to 2, creating a message like
"line 1 didn't jump to line 2 because the condition on line 1 was never true."
"""
lineno: TLineNo
cause: str = ""
Expand Down Expand Up @@ -655,7 +672,6 @@ def __init__(self, body: Sequence[ast.AST]) -> None:
self.body = body
self.lineno = body[0].lineno

# TODO: some add_arcs methods here don't add arcs, they return them. Rename them.
# TODO: Shouldn't the cause messages join with "and" instead of "or"?

def _make_expression_code_method(noun: str) -> Callable[[AstArcAnalyzer, ast.AST], None]:
Expand All @@ -668,7 +684,21 @@ def _code_object__expression_callable(self: AstArcAnalyzer, node: ast.AST) -> No


class AstArcAnalyzer:
"""Analyze source text with an AST to find executable code paths."""
"""Analyze source text with an AST to find executable code paths.
The .analyze() method does the work, and populates these attributes:
`arcs`: a set of (from, to) pairs of the the arcs possible in the code.
`missing_arc_fragments`: a dict mapping (from, to) arcs to lists of
message fragments explaining why the arc is missing from execution::
{ (start, end): [(startmsg, endmsg), ...], }
For an arc starting from line 17, they should be usable to form complete
sentences like: "Line 17 {endmsg} because {startmsg}".
"""

def __init__(
self,
Expand All @@ -695,31 +725,63 @@ def __init__(
print(ast.dump(self.root_node, include_attributes=True, **dumpkw))

self.arcs: set[TArc] = set()

# A map from arc pairs to a list of pairs of sentence fragments:
# { (start, end): [(startmsg, endmsg), ...], }
#
# For an arc from line 17, they should be usable like:
# "Line 17 {endmsg}, because {startmsg}"
self.missing_arc_fragments: TArcFragments = collections.defaultdict(list)
self.block_stack: list[Block] = []

# $set_env.py: COVERAGE_TRACK_ARCS - Trace possible arcs added while parsing code.
self.debug = bool(int(os.getenv("COVERAGE_TRACK_ARCS", "0")))

def analyze(self) -> None:
"""Examine the AST tree from `root_node` to determine possible arcs.
This sets the `arcs` attribute to be a set of (from, to) line number
pairs.
"""
"""Examine the AST tree from `self.root_node` to determine possible arcs."""
for node in ast.walk(self.root_node):
node_name = node.__class__.__name__
code_object_handler = getattr(self, "_code_object__" + node_name, None)
if code_object_handler is not None:
code_object_handler(node)

# Code object dispatchers: _code_object__*
#
# These methods are used by analyze() as the start of the analysis.
# There is one for each construct with a code object.

def _code_object__Module(self, node: ast.Module) -> None:
start = self.line_for_node(node)
if node.body:
exits = self.body_exits(node.body, from_start=ArcStart(-start))
for xit in exits:
self.add_arc(xit.lineno, -start, xit.cause, "didn't exit the module")
else:
# Empty module.
self.add_arc(-start, start)
self.add_arc(start, -start)

def _code_object__FunctionDef(self, node: ast.FunctionDef) -> None:
start = self.line_for_node(node)
self.block_stack.append(FunctionBlock(start=start, name=node.name))
exits = self.body_exits(node.body, from_start=ArcStart(-start))
self.process_return_exits(exits)
self.block_stack.pop()

_code_object__AsyncFunctionDef = _code_object__FunctionDef

def _code_object__ClassDef(self, node: ast.ClassDef) -> None:
start = self.line_for_node(node)
self.add_arc(-start, start)
exits = self.body_exits(node.body, from_start=ArcStart(start))
for xit in exits:
self.add_arc(
xit.lineno, -start, xit.cause,
f"didn't exit the body of class {node.name!r}",
)

_code_object__Lambda = _make_expression_code_method("lambda")
_code_object__GeneratorExp = _make_expression_code_method("generator expression")
if env.PYBEHAVIOR.comprehensions_are_functions:
_code_object__DictComp = _make_expression_code_method("dictionary comprehension")
_code_object__SetComp = _make_expression_code_method("set comprehension")
_code_object__ListComp = _make_expression_code_method("list comprehension")


def add_arc(
self,
start: TLineNo,
Expand Down Expand Up @@ -756,6 +818,11 @@ def line_for_node(self, node: ast.AST) -> TLineNo:
else:
return node.lineno

# First lines: _line__*
#
# Dispatched by line_for_node, each method knows how to identify the first
# line number in the node, as Python will report it.

def _line_decorated(self, node: ast.FunctionDef) -> TLineNo:
"""Compute first line number for things that can be decorated (classes and functions)."""
if node.decorator_list:
Expand Down Expand Up @@ -804,8 +871,8 @@ def _line__Module(self, node: ast.Module) -> TLineNo:
"Import", "ImportFrom", "Nonlocal", "Pass",
}

def add_arcs(self, node: ast.AST) -> set[ArcStart]:
"""Add the arcs for `node`.
def node_exits(self, node: ast.AST) -> set[ArcStart]:
"""Find the set of arc starts that exit this node.
Return a set of ArcStarts, exits from this node to the next. Because a
node represents an entire sub-tree (including its children), the exits
Expand All @@ -817,7 +884,8 @@ def add_arcs(self, node: ast.AST) -> set[ArcStart]:
else:
doit(5)
There are two exits from line 1: they start at line 3 and line 5.
There are three exits from line 1: they start at lines 1, 3 and 5.
There are two exits from line 2: lines 3 and 5.
"""
node_name = node.__class__.__name__
Expand All @@ -826,7 +894,7 @@ def add_arcs(self, node: ast.AST) -> set[ArcStart]:
getattr(self, "_handle__" + node_name, None),
)
if handler is not None:
return handler(node)
arc_starts = handler(node)
else:
# No handler: either it's something that's ok to default (a simple
# statement), or it's something we overlooked.
Expand All @@ -835,27 +903,35 @@ def add_arcs(self, node: ast.AST) -> set[ArcStart]:
raise RuntimeError(f"*** Unhandled: {node}") # pragma: only failure

# Default for simple statements: one exit from this node.
return {ArcStart(self.line_for_node(node))}
arc_starts = {ArcStart(self.line_for_node(node))}
return arc_starts

def add_body_arcs(
def body_exits(
self,
body: Sequence[ast.AST],
from_start: ArcStart | None = None,
prev_starts: set[ArcStart] | None = None,
) -> set[ArcStart]:
"""Add arcs for the body of a compound statement.
"""Find arc starts that exit the body of a compound statement.
`body` is the body node. `from_start` is a single `ArcStart` that can
be the previous line in flow before this body. `prev_starts` is a set
of ArcStarts that can be the previous line. Only one of them should be
given.
Also records arcs (using `add_arc`) within the body.
Returns a set of ArcStarts, the exits from this body.
"""
if prev_starts is None:
assert from_start is not None
prev_starts = {from_start}
else:
assert from_start is None

# Loop over the nodes in the body, making arcs from each one's exits to
# the next node.
for body_node in body:
lineno = self.line_for_node(body_node)
first_line = self.multiline.get(lineno, lineno)
Expand All @@ -867,7 +943,7 @@ def add_body_arcs(
lineno = self.line_for_node(body_node)
for prev_start in prev_starts:
self.add_arc(prev_start.lineno, lineno, prev_start.cause)
prev_starts = self.add_arcs(body_node)
prev_starts = self.node_exits(body_node)
return prev_starts

def find_non_missing_node(self, node: ast.AST) -> ast.AST | None:
Expand All @@ -879,7 +955,7 @@ def find_non_missing_node(self, node: ast.AST) -> ast.AST | None:
Returns a node, or None if none of the node remains.
"""
# This repeats work just done in add_body_arcs, but this duplication
# This repeats work just done in body_exits, but this duplication
# means we can avoid a function call in the 99.9999% case of not
# optimizing away statements.
lineno = self.line_for_node(node)
Expand Down Expand Up @@ -995,13 +1071,13 @@ def process_return_exits(self, exits: set[ArcStart]) -> None:
if block.process_return_exits(exits, self.add_arc):
break

# Handlers: _handle__*
# Node handlers: _handle__*
#
# Each handler deals with a specific AST node type, dispatched from
# add_arcs. Handlers return the set of exits from that node, and can
# node_exits. Handlers return the set of exits from that node, and can
# also call self.add_arc to record arcs they find. These functions mirror
# the Python semantics of each syntactic construct. See the docstring
# for add_arcs to understand the concept of exits from a node.
# for node_exits to understand the concept of exits from a node.
#
# Every node type that represents a statement should have a handler, or it
# should be listed in OK_TO_DEFAULT.
Expand Down Expand Up @@ -1055,7 +1131,7 @@ def _handle__For(self, node: ast.For) -> set[ArcStart]:
start = self.line_for_node(node.iter)
self.block_stack.append(LoopBlock(start=start))
from_start = ArcStart(start, cause="the loop on line {lineno} never started")
exits = self.add_body_arcs(node.body, from_start=from_start)
exits = self.body_exits(node.body, from_start=from_start)
# Any exit from the body will go back to the top of the loop.
for xit in exits:
self.add_arc(xit.lineno, start, xit.cause)
Expand All @@ -1064,7 +1140,7 @@ def _handle__For(self, node: ast.For) -> set[ArcStart]:
exits = my_block.break_exits
from_start = ArcStart(start, cause="the loop on line {lineno} didn't complete")
if node.orelse:
else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
else_exits = self.body_exits(node.orelse, from_start=from_start)
exits |= else_exits
else:
# No else clause: exit from the for line.
Expand All @@ -1079,9 +1155,9 @@ def _handle__For(self, node: ast.For) -> set[ArcStart]:
def _handle__If(self, node: ast.If) -> set[ArcStart]:
start = self.line_for_node(node.test)
from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
exits = self.add_body_arcs(node.body, from_start=from_start)
exits = self.body_exits(node.body, from_start=from_start)
from_start = ArcStart(start, cause="the condition on line {lineno} was always true")
exits |= self.add_body_arcs(node.orelse, from_start=from_start)
exits |= self.body_exits(node.orelse, from_start=from_start)
return exits

if sys.version_info >= (3, 10):
Expand All @@ -1102,7 +1178,7 @@ def _handle__Match(self, node: ast.Match) -> set[ArcStart]:
case_start,
cause="the pattern on line {lineno} never matched",
)
exits |= self.add_body_arcs(case.body, from_start=from_start)
exits |= self.body_exits(case.body, from_start=from_start)
last_start = case_start
if not had_wildcard:
exits.add(
Expand All @@ -1112,7 +1188,7 @@ def _handle__Match(self, node: ast.Match) -> set[ArcStart]:

def _handle__NodeList(self, node: NodeList) -> set[ArcStart]:
start = self.line_for_node(node)
exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
exits = self.body_exits(node.body, from_start=ArcStart(start))
return exits

def _handle__Raise(self, node: ast.Raise) -> set[ArcStart]:
Expand Down Expand Up @@ -1147,7 +1223,7 @@ def _handle__Try(self, node: ast.Try) -> set[ArcStart]:
self.block_stack.append(try_block)

start = self.line_for_node(node)
exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
exits = self.body_exits(node.body, from_start=ArcStart(start))

# We're done with the `try` body, so this block no longer handles
# exceptions. We keep the block so the `finally` clause can pick up
Expand All @@ -1173,10 +1249,10 @@ def _handle__Try(self, node: ast.Try) -> set[ArcStart]:
last_handler_start = handler_start
from_cause = "the exception caught by line {lineno} didn't happen"
from_start = ArcStart(handler_start, cause=from_cause)
handler_exits |= self.add_body_arcs(handler_node.body, from_start=from_start)
handler_exits |= self.body_exits(handler_node.body, from_start=from_start)

if node.orelse:
exits = self.add_body_arcs(node.orelse, prev_starts=exits)
exits = self.body_exits(node.orelse, prev_starts=exits)

exits |= handler_exits

Expand All @@ -1190,7 +1266,7 @@ def _handle__Try(self, node: ast.Try) -> set[ArcStart]:
try_block.return_from # or a `return`.
)

final_exits = self.add_body_arcs(node.finalbody, prev_starts=final_from)
final_exits = self.body_exits(node.finalbody, prev_starts=final_from)

if try_block.break_from:
if env.PYBEHAVIOR.finally_jumps_back:
Expand Down Expand Up @@ -1267,7 +1343,7 @@ def _handle__While(self, node: ast.While) -> set[ArcStart]:
to_top = self.line_for_node(node.body[0])
self.block_stack.append(LoopBlock(start=to_top))
from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
exits = self.add_body_arcs(node.body, from_start=from_start)
exits = self.body_exits(node.body, from_start=from_start)
for xit in exits:
self.add_arc(xit.lineno, to_top, xit.cause)
exits = set()
Expand All @@ -1276,7 +1352,7 @@ def _handle__While(self, node: ast.While) -> set[ArcStart]:
exits.update(my_block.break_exits)
from_start = ArcStart(start, cause="the condition on line {lineno} was always true")
if node.orelse:
else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
else_exits = self.body_exits(node.orelse, from_start=from_start)
exits |= else_exits
else:
# No `else` clause: you can exit from the start.
Expand All @@ -1288,7 +1364,7 @@ def _handle__With(self, node: ast.With) -> set[ArcStart]:
start = self.line_for_node(node)
if env.PYBEHAVIOR.exit_through_with:
self.block_stack.append(WithBlock(start=start))
exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
exits = self.body_exits(node.body, from_start=ArcStart(start))
if env.PYBEHAVIOR.exit_through_with:
with_block = self.block_stack.pop()
assert isinstance(with_block, WithBlock)
Expand All @@ -1312,45 +1388,3 @@ def _handle__With(self, node: ast.With) -> set[ArcStart]:
return exits

_handle__AsyncWith = _handle__With

# Code object dispatchers: _code_object__*
#
# These methods are used by analyze() as the start of the analysis.
# There is one for each construct with a code object.

def _code_object__Module(self, node: ast.Module) -> None:
start = self.line_for_node(node)
if node.body:
exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
for xit in exits:
self.add_arc(xit.lineno, -start, xit.cause, "didn't exit the module")
else:
# Empty module.
self.add_arc(-start, start)
self.add_arc(start, -start)

def _code_object__FunctionDef(self, node: ast.FunctionDef) -> None:
start = self.line_for_node(node)
self.block_stack.append(FunctionBlock(start=start, name=node.name))
exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
self.process_return_exits(exits)
self.block_stack.pop()

_code_object__AsyncFunctionDef = _code_object__FunctionDef

def _code_object__ClassDef(self, node: ast.ClassDef) -> None:
start = self.line_for_node(node)
self.add_arc(-start, start)
exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
for xit in exits:
self.add_arc(
xit.lineno, -start, xit.cause,
f"didn't exit the body of class {node.name!r}",
)

_code_object__Lambda = _make_expression_code_method("lambda")
_code_object__GeneratorExp = _make_expression_code_method("generator expression")
if env.PYBEHAVIOR.comprehensions_are_functions:
_code_object__DictComp = _make_expression_code_method("dictionary comprehension")
_code_object__SetComp = _make_expression_code_method("set comprehension")
_code_object__ListComp = _make_expression_code_method("list comprehension")

0 comments on commit 6ae9363

Please sign in to comment.