Skip to content

Commit

Permalink
wip: branch/jump without disabling lines
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed Jan 6, 2024
1 parent c34e0b6 commit 3c98893
Showing 1 changed file with 74 additions and 49 deletions.
123 changes: 74 additions & 49 deletions coverage/sysmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@

from __future__ import annotations

import collections
import dataclasses
import dis
import functools
import inspect
import os
import os.path
import itertools
import sys
import threading
import traceback

from types import CodeType, FrameType
Expand All @@ -27,7 +26,7 @@
cast,
)

from coverage.debug import short_filename, short_stack
from coverage.debug import log, short_filename, short_stack
from coverage.types import (
AnyCallable,
TArc,
Expand Down Expand Up @@ -65,7 +64,11 @@ def __init__(self, wrapped: Any, namespace: str) -> None:

def __getattr__(self, name: str) -> Callable[..., Any]:
def _wrapped(*args: Any, **kwargs: Any) -> Any:
log(f"{self.namespace}.{name}{args}{kwargs}")
self_prefix = ""
caller_self = inspect.stack()[1][0].f_locals.get("self")
if caller_self is not None:
self_prefix = f"{id(caller_self):#x}: "
log(f"{self_prefix}{self.namespace}.{name}{args}{kwargs}")
return getattr(self.wrapped, name)(*args, **kwargs)

return _wrapped
Expand All @@ -76,28 +79,6 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any:
short_stack = functools.partial(
short_stack, full=True, short_filenames=True, frame_ids=True
)
seen_threads: Set[int] = set()

def log(msg: str) -> None:
"""Write a message to our detailed debugging log(s)."""
# Thread ids are reused across processes?
# Make a shorter number more likely to be unique.
pid = os.getpid()
tid = cast(int, threading.current_thread().ident)
tslug = f"{(pid * tid) % 9_999_991:07d}"
if tid not in seen_threads:
seen_threads.add(tid)
log(f"New thread {tid} {tslug}:\n{short_stack()}")
# log_seq = int(os.getenv("PANSEQ", "0"))
# root = f"/tmp/pan.{log_seq:03d}"
for filename in [
"/tmp/foo.out",
# f"{root}.out",
# f"{root}-{pid}.out",
# f"{root}-{pid}-{tslug}.out",
]:
with open(filename, "a") as f:
print(f"{pid}:{tslug}: {msg}", file=f, flush=True)

def arg_repr(arg: Any) -> str:
"""Make a customized repr for logged values."""
Expand Down Expand Up @@ -128,7 +109,9 @@ def _wrapped(self: Any, *args: Any) -> Any:
return ret
except Exception as exc:
log(f"!!{exc.__class__.__name__}: {exc}")
log("".join(traceback.format_exception(exc))) # pylint: disable=[no-value-for-parameter]
# fmt: off
log("".join(traceback.format_exception(exc))) # pylint: disable=[no-value-for-parameter]
# fmt: on
try:
assert sys_monitoring is not None
sys_monitoring.set_events(sys.monitoring.COVERAGE_ID, 0)
Expand All @@ -143,7 +126,9 @@ def _wrapped(self: Any, *args: Any) -> Any:

else:

def log(msg: str) -> None:
def log( # pylint: disable=function-redefined
msg: str, stack: bool = False
) -> None:
"""Write a message to our detailed debugging log(s), but not really."""

def panopticon(*names: Optional[str]) -> AnyCallable:
Expand All @@ -160,9 +145,9 @@ class CodeInfo:
"""The information we want about each code object."""

tracing: bool
file_data: Optional[TTraceFileData]
# TODO: what is byte_to_line for?
byte_to_line: Dict[int, int] | None
file_data: TTraceFileData
byte_to_line: Dict[int, int]
branch_dests: Dict[int, Set[int]]


def bytes_to_lines(code: CodeType) -> Dict[int, int]:
Expand All @@ -180,9 +165,14 @@ def bytes_to_lines(code: CodeType) -> Dict[int, int]:
class SysMonitor(TracerCore):
"""Python implementation of the raw data tracer for PEP669 implementations."""

serial = itertools.count()

# One of these will be used across threads. Be careful.

def __init__(self, tool_id: int) -> None:
self.number = next(self.serial)
log(f"SysMonitor() #{self.number}: {id(self):#x}")

# Attributes set from the collector:
self.data: TTraceData
self.trace_arcs = False
Expand Down Expand Up @@ -217,7 +207,10 @@ def __init__(self, tool_id: int) -> None:
def __repr__(self) -> str:
points = sum(len(v) for v in self.data.values())
files = len(self.data)
return f"<SysMonitor at {id(self):#x}: {points} data points in {files} files>"
return (
f"<SysMonitor #{self.number} at {id(self):#x}: "
+ f"{points} data points in {files} files>"
)

@panopticon()
def start(self) -> None:
Expand All @@ -227,17 +220,19 @@ def start(self) -> None:
assert sys_monitoring is not None
sys_monitoring.use_tool_id(self.myid, "coverage.py")
register = functools.partial(sys_monitoring.register_callback, self.myid)
events = sys_monitoring.events
events = sys.monitoring.events
if self.trace_arcs:
sys_monitoring.set_events(
self.myid,
events.PY_START | events.PY_UNWIND,
)
register(events.PY_START, self.sysmon_py_start)
register(events.BRANCH, self.sysmon_branch)
register(events.JUMP, self.sysmon_jump)
register(events.LINE, self.sysmon_line_arcs)
register(events.PY_RESUME, self.sysmon_py_resume_arcs)
register(events.PY_RETURN, self.sysmon_py_return_arcs)
register(events.PY_START, self.sysmon_py_start)
register(events.PY_UNWIND, self.sysmon_py_unwind_arcs)
register(events.LINE, self.sysmon_line_arcs)
else:
sys_monitoring.set_events(self.myid, events.PY_START)
register(events.PY_START, self.sysmon_py_start)
Expand All @@ -257,6 +252,11 @@ def stop(self) -> None:
for code in self.local_event_codes.values():
sys_monitoring.set_local_events(self.myid, code, 0)
self.local_event_codes = {}

# register = functools.partial(sys_monitoring.register_callback, self.myid)
#
# sys.monitoring.register_callback(tool_id, event, None).

sys_monitoring.free_tool_id(self.myid)
self.sysmon_on = False

Expand Down Expand Up @@ -326,31 +326,26 @@ def sysmon_py_start(self, code: CodeType, instruction_offset: int) -> MonitorRet
file_data = self.data[tracename]
b2l = bytes_to_lines(code)
else:
file_data = None
b2l = None
# These won't be used, but empties simplify the type checking.
file_data = set()
b2l = {}

self.code_infos[id(code)] = CodeInfo(
tracing=tracing_code,
file_data=file_data,
byte_to_line=b2l,
branch_dests=collections.defaultdict(set),
)
self.code_objects.append(code)

if tracing_code:
events = sys.monitoring.events
if self.sysmon_on:
assert sys_monitoring is not None
sys_monitoring.set_local_events(
self.myid,
code,
events.PY_RETURN
#
| events.PY_RESUME
# | events.PY_YIELD
| events.LINE,
# | events.BRANCH
# | events.JUMP
)
my_events = events.PY_RETURN | events.PY_RESUME | events.LINE
if self.trace_arcs:
my_events |= events.BRANCH | events.JUMP
sys_monitoring.set_local_events(self.myid, code, my_events)
self.local_event_codes[id(code)] = code

if tracing_code and self.trace_arcs:
Expand Down Expand Up @@ -423,3 +418,33 @@ def sysmon_line_arcs(self, code: CodeType, line_number: int) -> MonitorReturn:
# log(f"adding {arc=}")
self.last_lines[frame] = line_number
return ret

@panopticon("code", "src", "dst")
def sysmon_jump(
self, code: CodeType, instruction_offset: int, destination_offset: int
) -> MonitorReturn:
"""Handle sys.monitoring.events.JUMP events."""
code_info = self.code_infos[id(code)]
src_line = code_info.byte_to_line[instruction_offset]
dst_line = code_info.byte_to_line[destination_offset]
if src_line != dst_line:
cast(Set[TArc], code_info.file_data).add((src_line, dst_line))
return sys.monitoring.DISABLE

@panopticon("code", "src", "dst")
def sysmon_branch(
self, code: CodeType, instruction_offset: int, destination_offset: int
) -> MonitorReturn:
"""Handle sys.monitoring.events.BRANCH events."""
code_info = self.code_infos[id(code)]
dests = code_info.branch_dests[instruction_offset]
if destination_offset not in dests:
src_line = code_info.byte_to_line[instruction_offset]
dst_line = code_info.byte_to_line[destination_offset]
if src_line != dst_line:
cast(Set[TArc], code_info.file_data).add((src_line, dst_line))
dests.add(destination_offset)
if len(dests) == 2:
return sys.monitoring.DISABLE
else:
return None

0 comments on commit 3c98893

Please sign in to comment.