Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Move type checking code and make it bidirectional #58

Merged
merged 28 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
871b14c
Add type checking code
mark-koch Nov 22, 2023
b0f482e
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 24, 2023
0f1c428
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 24, 2023
0851fd6
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 24, 2023
0b4ee03
Make CustomCallChecker.check abstract again
mark-koch Nov 24, 2023
708ab55
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 27, 2023
6ea1df5
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 27, 2023
867e982
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 27, 2023
d09ce09
Merge remote-tracking branch 'origin/refactor/compilation-stages' int…
mark-koch Nov 27, 2023
05dc741
Merge branch 'refactor/compilation-stages' into stages/check
mark-koch Nov 27, 2023
b2ad478
Remove unused import
mark-koch Nov 27, 2023
61e2cba
Detect unused linear expressions
mark-koch Nov 28, 2023
302e2aa
Improve synthesize_call docstring
mark-koch Nov 30, 2023
145c6aa
Add module docstrings
mark-koch Nov 30, 2023
6b833cb
Rename cfg to containing_cfg
mark-koch Nov 30, 2023
29c6009
Improve check for entry BB
mark-koch Nov 30, 2023
f982901
Use booly value of list
mark-koch Dec 4, 2023
55d7271
Use contextlib suppress
mark-koch Dec 4, 2023
6e3c2f3
Fix typo
mark-koch Dec 4, 2023
dacc31f
Clarify comment
mark-koch Dec 4, 2023
a872eed
Clarify docstring
mark-koch Dec 4, 2023
43e25e3
Align op table
mark-koch Dec 4, 2023
4de95e8
Specify type: ignore
mark-koch Dec 4, 2023
34db487
Improve error message wording
mark-koch Dec 4, 2023
c522771
Use pattern matches
mark-koch Dec 4, 2023
f930ecc
Turn assert into exception
mark-koch Dec 4, 2023
f5e60cf
Specify type: ignore
mark-koch Dec 4, 2023
2f29f2f
Fix CI
mark-koch Dec 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion guppy/cfg/bb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class BB(ABC):
idx: int

# Pointer to the CFG that contains this node
cfg: "BaseCFG[Self]"
containing_cfg: "BaseCFG[Self]"

# AST statements contained in this BB
statements: list[BBStatement] = field(default_factory=list)
Expand Down
164 changes: 161 additions & 3 deletions guppy/checker/cfg_checker.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
"""Type checking code for control-flow graphs

Operates on CFGs produced by the `CFGBuilder`. Produces a `CheckedCFG` consisting of
`CheckedBB`s with inferred type signatures.
"""

import collections
from dataclasses import dataclass
from typing import Sequence

from guppy.ast_util import line_col
from guppy.cfg.bb import BB
from guppy.cfg.cfg import CFG, BaseCFG
from guppy.checker.core import Globals
from guppy.checker.core import Globals, Context

from guppy.checker.core import Variable
from guppy.checker.expr_checker import ExprSynthesizer, to_bool
from guppy.checker.stmt_checker import StmtChecker
from guppy.error import GuppyError
from guppy.gtypes import GuppyType


Expand Down Expand Up @@ -52,7 +63,53 @@ def check_cfg(
Annotates the basic blocks with input and output type signatures and removes
unreachable blocks.
"""
raise NotImplementedError
# First, we need to run program analysis
ass_before = set(v.name for v in inputs)
qartik marked this conversation as resolved.
Show resolved Hide resolved
cfg.analyze(ass_before, ass_before)

# We start by compiling the entry BB
checked_cfg = CheckedCFG([v.ty for v in inputs], return_ty)
checked_cfg.entry_bb = check_bb(
cfg.entry_bb, checked_cfg, inputs, return_ty, globals
)
compiled = {cfg.entry_bb: checked_cfg.entry_bb}

# Visit all control-flow edges in BFS order. We can't just do a normal loop over
# all BBs since the input types for a BB are computed by checking a predecessor.
# We do BFS instead of DFS to get a better error ordering.
queue = collections.deque(
(checked_cfg.entry_bb, i, succ)
for i, succ in enumerate(cfg.entry_bb.successors)
)
while len(queue) > 0:
pred, num_output, bb = queue.popleft()
input_row = [
Variable(v.name, v.ty, v.defined_at, None)
for v in pred.sig.output_rows[num_output]
]

if bb in compiled:
# If the BB was already compiled, we just have to check that the signatures
# match.
check_rows_match(input_row, compiled[bb].sig.input_row, bb)
else:
# Otherwise, check the BB and enqueue its successors
checked_bb = check_bb(bb, checked_cfg, input_row, return_ty, globals)
queue += [(checked_bb, i, succ) for i, succ in enumerate(bb.successors)]
compiled[bb] = checked_bb

# Link up BBs in the checked CFG
compiled[bb].predecessors.append(pred)
pred.successors[num_output] = compiled[bb]

checked_cfg.bbs = list(compiled.values())
checked_cfg.exit_bb = compiled[cfg.exit_bb] # TODO: Fails if exit is unreachable
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to catch a KeyError exception here?

Copy link
Collaborator Author

@mark-koch mark-koch Dec 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case should never occur since Guppy doesn't let users write functions that don't return:

def main() -> int:
   while True:
      pass
   return 0  # Unreachable statement, but still required by Guppy

This is because Guppy program analysis doesn't take the semantics of True and False into account. As a result, the exit BB in a CFG will always be reachable in the graph.

In the future, we might allow users to write infinitely looping functions that don't require the return statement at the end. Then we would need to be more careful about this line

checked_cfg.live_before = {compiled[bb]: cfg.live_before[bb] for bb in cfg.bbs}
checked_cfg.ass_before = {compiled[bb]: cfg.ass_before[bb] for bb in cfg.bbs}
checked_cfg.maybe_ass_before = {
compiled[bb]: cfg.maybe_ass_before[bb] for bb in cfg.bbs
}
return checked_cfg


def check_bb(
Expand All @@ -62,4 +119,105 @@ def check_bb(
return_ty: GuppyType,
globals: Globals,
) -> CheckedBB:
raise NotImplementedError
cfg = bb.containing_cfg

# For the entry BB we have to separately check that all used variables are
# defined. For all other BBs, this will be checked when compiling a predecessor.
if bb == cfg.entry_bb:
assert len(bb.predecessors) == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

failures on asserts should probably raise internal errors.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I typically use asserts as a concise check and documentation of invariants. We could catch AssertionErrors on the top-level and turn them into InternalGuppyErrors?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably fine. ruff tells me it's usually a good idea to avoid them in production code as they might get erased with certain compilation flags. Unsure how much of a concern are they in your case.

for x, use in bb.vars.used.items():
if x not in cfg.ass_before[bb] and x not in globals.values:
raise GuppyError(f"Variable `{x}` is not defined", use)

# Check the basic block
ctx = Context(globals, {v.name: v for v in inputs})
checked_stmts = StmtChecker(ctx, bb, return_ty).check_stmts(bb.statements)

# If we branch, we also have to check the branch predicate
if len(bb.successors) > 1:
assert bb.branch_pred is not None
bb.branch_pred, ty = ExprSynthesizer(ctx).synthesize(bb.branch_pred)
bb.branch_pred, _ = to_bool(bb.branch_pred, ty, ctx)

for succ in bb.successors:
for x, use_bb in cfg.live_before[succ].items():
# Check that the variables requested by the successor are defined
if x not in ctx.locals and x not in ctx.globals.values:
# If the variable is defined on *some* paths, we can give a more
# informative error message
if x in cfg.maybe_ass_before[use_bb]:
# TODO: This should be "Variable x is not defined when coming
# from {bb}". But for this we need a way to associate BBs with
# source locations.
raise GuppyError(
f"Variable `{x}` is not defined on all control-flow paths.",
use_bb.vars.used[x],
)
raise GuppyError(f"Variable `{x}` is not defined", use_bb.vars.used[x])

# We have to check that used linear variables are not being outputted
if x in ctx.locals:
var = ctx.locals[x]
if var.ty.linear and var.used:
raise GuppyError(
f"Variable `{x}` with linear type `{var.ty}` was "
"already used (at {0})",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"already used (at {0})",
"already used (at 0)",

would this mean something different? Confused.

Copy link
Collaborator Author

@mark-koch mark-koch Dec 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this line is a regular string, not an fstring, so it is the actual char sequence "{0}".

When referring to code locations in error messages, we put the format placeholders {0}, {1}, etc and store the corresponding AST nodes in the error, instead of baking the location into the string. The reasoning is that this gives us more flexibility in how we want to display source locations when printing the error

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I must have misread that as part of an f-string. Thanks for the explanation.

cfg.live_before[succ][x].vars.used[x],
[var.used],
)

# On the other hand, unused linear variables *must* be outputted
for x, var in ctx.locals.items():
if var.ty.linear and not var.used and x not in cfg.live_before[succ]:
# TODO: This should be "Variable x with linear type ty is not
# used in {bb}". But for this we need a way to associate BBs with
# source locations.
raise GuppyError(
f"Variable `{x}` with linear type `{var.ty}` is "
"not used on all control-flow paths",
var.defined_at,
)

# Finally, we need to compute the signature of the basic block
outputs = [
[ctx.locals[x] for x in cfg.live_before[succ] if x in ctx.locals]
for succ in bb.successors
]

# Also prepare the successor list so we can fill it in later
checked_bb = CheckedBB(
bb.idx, checked_cfg, checked_stmts, sig=Signature(inputs, outputs)
)
checked_bb.successors = [None] * len(bb.successors) # type: ignore
mark-koch marked this conversation as resolved.
Show resolved Hide resolved
checked_bb.branch_pred = bb.branch_pred
return checked_bb


def check_rows_match(row1: VarRow, row2: VarRow, bb: BB) -> None:
"""Checks that the types of two rows match up.

Otherwise, an error is thrown, alerting the user that a variable has different
types on different control-flow paths.
"""
map1, map2 = {v.name: v for v in row1}, {v.name: v for v in row2}
assert map1.keys() == map2.keys()
for x in map1:
v1, v2 = map1[x], map2[x]
if v1.ty != v2.ty:
# In the error message, we want to mention the variable that was first
# defined at the start.
if (
v1.defined_at
and v2.defined_at
and line_col(v2.defined_at) < line_col(v1.defined_at)
):
v1, v2 = v2, v1
# We shouldn't mention temporary variables (starting with `%`)
# in error messages:
ident = "Expression" if v1.name.startswith("%") else f"Variable `{v1.name}`"
raise GuppyError(
f"{ident} can refer to different types: "
f"`{v1.ty}` (at {{}}) vs `{v2.ty}` (at {{}})",
bb.containing_cfg.live_before[bb][v1.name].vars.used[v1.name],
[v1.defined_at, v2.defined_at],
)
Loading