Skip to content

Commit

Permalink
Support synchronization points at fixed offsets, without pattern search.
Browse files Browse the repository at this point in the history
If a synchronization point (i.e., a field with `&synchronize`) can be
determined to reside at a fixed offset from either the start of a unit
or a previous synchronization point, we can now leverage that after an
error to jump there directly to resume parsing.
  • Loading branch information
rsmmr committed Dec 20, 2024
1 parent 1b6b08b commit 6c277fc
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 34 deletions.
1 change: 1 addition & 0 deletions hilti/toolchain/include/ast/builder/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ class Builder : public builder::NodeFactory {
auto division(Expression* op1, Expression* op2, const Meta& m = Meta()) {
return expressionUnresolvedOperator(operator_::Kind::Division, {op1, op2}, m);
}

auto multiple(Expression* op1, Expression* op2, const Meta& m = Meta()) {
return expressionUnresolvedOperator(operator_::Kind::Multiple, {op1, op2}, m);
}
Expand Down
8 changes: 6 additions & 2 deletions spicy/toolchain/include/ast/types/unit-items/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ class Field : public unit::Item {
/** Get the `&convert` expression, if any. */
std::optional<std::pair<Expression*, QualifiedType*>> convertExpression() const;

/**
* Returns an expression representing the number of bytes the field
* consumes, if known.
*/
Expression* parseSize(Builder* builder) const;

void setForwarding(bool is_forwarding) { _is_forwarding = is_forwarding; }
void setTransient(bool is_transient) { _is_transient = is_transient; }
void setDDType(ASTContext* ctx, QualifiedType* t);
Expand All @@ -88,8 +94,6 @@ class Field : public unit::Item {

QualifiedType* itemType() const final { return child<QualifiedType>(3); }

Expression* parseSize(Builder* builder) const;

bool isResolved(hilti::node::CycleDetector* cd) const final {
return type() || item() || itemType()->isResolved(cd);
}
Expand Down
18 changes: 11 additions & 7 deletions spicy/toolchain/include/compiler/detail/codegen/production.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,20 @@ class Production {
virtual QualifiedType* type() const { return nullptr; };

/**
* Returns an expression yielding the raw number of bytes that parsing this
* production will consume. The expression must evaluate to a constant
* value of type `uint64`. Returns null if the size isn't constant, or
* cannot be computed.
* Returns an expression representing the number of bytes the production
* consumes, if known. Returns null if the size cannot be computed.
*
* This does not take any field parsing attributes into account (e.g.,
* `&size`). It's up to the caller to apply those if needed.
* The resulting expression does not take any type-independent field
* parsing attributes into account (e.g., `&size`). It's up to the caller
* to apply those if needed. However, the expression will (and must) take
* type-specific attributes into account (e.g., `&ipv4` for an address).
*
* Note that the returned expression may not be a constant: it can depend
* on other values not known at compile time.
*
* @param builder builder to use for creating the expression
* @return expression yielding the size, or null if not available
* @return expression yielding the size, or null if not available; must
* evaluate to a value of type `uint64`.
*/
virtual Expression* parseSize(Builder* builder) const = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,8 @@ class Counter : public Production {
Expression* expression() const final { return _expression; }

std::vector<std::vector<Production*>> rhss() const final { return {{_body.get()}}; };
Expression* parseSize(Builder* builder) const final {
if ( ! _expression->isConstant() )
return nullptr;

Expression* parseSize(Builder* builder) const final {
auto size = _body->parseSize(builder);
if ( ! size )
return nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
#include <string>
#include <utility>

#include <hilti/ast/ctors/integer.h>
#include <hilti/ast/expressions/ctor.h>

#include <spicy/ast/builder/builder.h>
#include <spicy/compiler/detail/codegen/production.h>
#include <spicy/compiler/detail/codegen/productions/visitor.h>
Expand Down
4 changes: 2 additions & 2 deletions spicy/toolchain/src/ast/types/unit-items/field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ struct SizeVisitor : hilti::visitor::PreOrder {
hilti::rt::cannot_be_reached();
}

void operator()(hilti::type::SignedInteger* n) final { result = builder->integer(n->width() / 8U); }
void operator()(hilti::type::UnsignedInteger* n) final { result = builder->integer(n->width() / 8U); }
void operator()(hilti::type::Bitfield* n) final { result = builder->integer(n->width() / 8U); }

void operator()(hilti::type::Real*) final {
Expand All @@ -58,6 +56,8 @@ struct SizeVisitor : hilti::visitor::PreOrder {
builder->integer(4U), builder->integer(8U));
}

void operator()(hilti::type::SignedInteger* n) final { result = builder->integer(n->width() / 8U); }
void operator()(hilti::type::UnsignedInteger* n) final { result = builder->integer(n->width() / 8U); }
void operator()(hilti::type::Void* n) final { result = builder->integer(0U); }
};

Expand Down
60 changes: 45 additions & 15 deletions spicy/toolchain/src/compiler/codegen/parser-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1664,13 +1664,6 @@ struct ProductionVisitor : public production::Visitor {
Expression* parseSizeOfSynchronizationGroup(const production::Unit* p, const std::vector<uint64_t>& field_indices) {
Expression* group_size = nullptr;

auto add_to_group_size = [&group_size, this](Expression* size) {
if ( group_size )
group_size = builder()->sum(group_size, size);
else
group_size = size;
};

for ( auto i : field_indices ) {
const auto& field_production = p->fields()[i];

Expand All @@ -1682,21 +1675,35 @@ struct ProductionVisitor : public production::Visitor {
if ( field_attributes->has(hilti::Attribute::Kind::ParseFrom) ||
field_attributes->has(hilti::Attribute::Kind::ParseAt) ) {
// These don't affect the size of the group.
add_to_group_size(builder()->integer(0));
if ( ! group_size )
group_size = builder()->integer(0);

continue;
}

if ( field_attributes->has(hilti::Attribute::Kind::Eod) )
// Cannot determine the size of the group.
return nullptr;

Expression* field_parse_size = nullptr;

if ( auto* size_attr = field_attributes->find(hilti::Attribute::Kind::Size) )
add_to_group_size(*size_attr->valueAsExpression());
field_parse_size = *size_attr->valueAsExpression();
else if ( auto* production_size = field_production->parseSize(builder()) )
add_to_group_size(production_size);
field_parse_size = production_size;
else
// Cannot determine the size of the group.
return nullptr;

if ( ! field_parse_size->isConstant() )
// Size of the group isn't fixed, meaning the amount may differ
// depending on when we evaluate it.
return nullptr;

if ( group_size )
group_size = builder()->sum(group_size, field_parse_size);
else
group_size = field_parse_size;
}

return group_size;
Expand Down Expand Up @@ -1763,6 +1770,7 @@ struct ProductionVisitor : public production::Visitor {

// Group adjacent fields with same sync point.
std::vector<std::pair<std::vector<uint64_t>, std::optional<uint64_t>>> groups;

for ( uint64_t i = 0; i < sync_points.size(); ++i ) {
const auto& sync_point = sync_points[i];
if ( ! groups.empty() && groups.back().second == sync_point )
Expand Down Expand Up @@ -1794,6 +1802,12 @@ struct ProductionVisitor : public production::Visitor {
parseField(p->fields()[field]);

else {
// Determine if the group has a fixed size. If so, remember it's start position.
Expression* group_begin = nullptr;
Expression* group_size = parseSizeOfSynchronizationGroup(p, fields);
if ( group_size )
group_begin = builder()->addTmp("sync_group_begin", state().cur);

auto try_ = builder()->addTry();

pushBuilder(try_.first, [&]() {
Expand All @@ -1804,18 +1818,34 @@ struct ProductionVisitor : public production::Visitor {
pushBuilder(try_.second.addCatch(
builder()->parameter(ID("e"), builder()->typeName("hilti::RecoverableFailure"))),
[&]() {
// There is a sync point; run its production w/o consuming input until parsing
// succeeds or we run out of data.
builder()->addDebugMsg("spicy-verbose",
fmt("failed to parse, will try to synchronize at '%s'",
p->fields()[*sync_point]->meta().field()->id()));

// Remember the original error so we can report it in case the sync failed.
// Remember the original error so we can report it in case the sync never gets
// confirmed. This is also what marks that we are in trial mode.
builder()->addAssign(state().error, builder()->id("e"));

if ( group_size ) {
pb->beforeHook();
builder()->addDebugMsg("spicy-verbose", "successfully synchronized");
builder()->addMemberCall(state().self, "__on_0x25_synced", {}, p->location());
pb->afterHook();
}
});

startSynchronize(*p->fields()[*sync_point]);
++trial_loops;
if ( group_size ) {
// When the whole group is of fixed size, we know directly where
// to continue parsing, and can just jump there. We prefer this
// over pattern-based synchronization.
builder()->addAssign(state().cur, builder()->memberCall(group_begin, "advance", {group_size}));
pb->trimInput();
}
else {
// Start searching for the sync point.
startSynchronize(*p->fields()[*sync_point]);
++trial_loops;
}
}
}

Expand Down
18 changes: 18 additions & 0 deletions tests/Baseline/spicy.types.unit.synchronize-fixed-size/output
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
[$xa=[$x1=b"123", $x2=b"45"], $xb=[$x1=b"123", $x2=b"45"], $y=[$y1=b"ab", $y2=25444, $y3=101], $z=b"DONE"]
===
* synchronized
* confirmed
[$xa=[$x1=b"12.", $x2=(not set)], $xb=(not set), $y=[$y1=b"ab", $y2=25444, $y3=101], $z=b"DONE"]
===
* synchronized
[$xa=[$x1=b"12.", $x2=(not set)], $xb=(not set), $y=[$y1=b"ab", $y2=25444, $y3=101], $z=b"DONE"]
[error] terminating with uncaught exception of type spicy::rt::ParseError: successful synchronization never confirmed: &requires failed: ($$ == b"123") (<...>/synchronize-fixed-size.spicy:19:23-19:46)
===
* synchronized
* confirmed
[$xa=[$x1=b"123", $x2=b"45"], $xb=[$x1=b"12.", $x2=(not set)], $y=[$y1=b"ab", $y2=25444, $y3=101], $z=b"DONE"]
===
===
[error] terminating with uncaught exception of type spicy::rt::ParseError: expected bytes literal "DONE" but input starts with "DO.E" (<...>/synchronize-fixed-size.spicy:33:8-33:14)
===
4 changes: 2 additions & 2 deletions tests/spicy/attributes/synchronize.spicy
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
module test;

type A = unit {
: uint8;
: bytes &until=b"\x00";
: uint8 &synchronize;
};
# @TEST-END-FILE
Expand All @@ -21,7 +21,7 @@ type A = unit {
};

type B = unit {
: uint8;
: bytes &until=b"\x00";
: A &synchronize;
};
# @TEST-END-FILE
48 changes: 48 additions & 0 deletions tests/spicy/types/unit/synchronize-fixed-size.spicy
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# @TEST-EXEC: printf 1234512345abcdeDONEc | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# @TEST-EXEC: printf 12.4512345abcdeDONEc | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# @TEST-EXEC-FAIL: printf 12.4512345abcdeDONE- | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# @TEST-EXEC: printf 1234512.45abcdeDONEc | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# TEST-EXEC: printf 1234512345ab.deDONEc | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# @TEST-EXEC-FAIL: printf 1234512345abcdeDO.E- | spicy-driver -d %INPUT >>output 2>&1
# @TEST-EXEC: echo === >>output
# @TEST-EXEC: btest-diff output
#
# @TEST-DOC: Check that we can synchronize after errors in fixed-size fields.
module Test;

type X = unit {
x1: bytes &size=3 &requires=($$ == b"123");
x2: bytes &eod;
};

type Y = unit {
y1: b"ab";
y2: uint16;
y3: uint8;
};

public type Foo = unit {
xa: X &size=5;
xb: X &size=5;
y: Y &synchronize;
z: b"DONE";

: bytes &size=1 { if ( $$ == b"c" ) confirm; }

on %synced {
print "* synchronized";
}

on %confirmed {
print "* confirmed";
}

on %done {
print self;
}
};

0 comments on commit 6c277fc

Please sign in to comment.