From a65ecc09b033765f0e4102a0bc1d802de4743de7 Mon Sep 17 00:00:00 2001 From: DavePearce Date: Thu, 25 Jul 2024 19:52:28 +1200 Subject: [PATCH] Cache Schema Columns This supports a cache for columns within a schema to avoid expensive iterator traversals on initialisation. This gives much better startup performance for (relatively) small traces. --- pkg/air/schema.go | 22 ++++++++++++++-------- pkg/hir/schema.go | 24 +++++++++++++++--------- pkg/mir/schema.go | 24 +++++++++++++++--------- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/pkg/air/schema.go b/pkg/air/schema.go index 2f2fcea9..9e16252c 100644 --- a/pkg/air/schema.go +++ b/pkg/air/schema.go @@ -43,6 +43,8 @@ type Schema struct { constraints []schema.Constraint // Property assertions. assertions []PropertyAssertion + // Cache list of columns declared in inputs and assignments. + column_cache []schema.Column } // EmptySchema is used to construct a fresh schema onto which new columns and @@ -54,6 +56,7 @@ func EmptySchema[C schema.Evaluable]() *Schema { p.assignments = make([]schema.Assignment, 0) p.constraints = make([]schema.Constraint, 0) p.assertions = make([]PropertyAssertion, 0) + p.column_cache = make([]schema.Column, 0) // Done return p } @@ -73,9 +76,14 @@ func (p *Schema) AddColumn(context trace.Context, name string, datatype schema.T panic(fmt.Sprintf("invalid module index (%d)", context.Module())) } + col := assignment.NewDataColumn(context, name, datatype) // NOTE: the air level has no ability to enforce the type specified for a // given column. - p.inputs = append(p.inputs, assignment.NewDataColumn(context, name, datatype)) + p.inputs = append(p.inputs, col) + // Update column cache + for c := col.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } // Calculate column index return uint(len(p.inputs) - 1) } @@ -86,6 +94,10 @@ func (p *Schema) AddColumn(context trace.Context, name string, datatype schema.T func (p *Schema) AddAssignment(c schema.Assignment) uint { index := p.Columns().Count() p.assignments = append(p.assignments, c) + // Update column cache + for c := c.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } return index } @@ -156,13 +168,7 @@ func (p *Schema) Assignments() util.Iterator[schema.Assignment] { // Columns returns an array over the underlying columns of this schema. // Specifically, the index of a column in this array is its column index. func (p *Schema) Columns() util.Iterator[schema.Column] { - inputs := util.NewArrayIterator(p.inputs) - is := util.NewFlattenIterator[schema.Declaration, schema.Column](inputs, - func(d schema.Declaration) util.Iterator[schema.Column] { return d.Columns() }) - ps := util.NewFlattenIterator[schema.Assignment, schema.Column](p.Assignments(), - func(d schema.Assignment) util.Iterator[schema.Column] { return d.Columns() }) - // - return is.Append(ps) + return util.NewArrayIterator(p.column_cache) } // Constraints returns an array over the underlying constraints of this diff --git a/pkg/hir/schema.go b/pkg/hir/schema.go index 8f6147fa..b19e59ca 100644 --- a/pkg/hir/schema.go +++ b/pkg/hir/schema.go @@ -45,6 +45,8 @@ type Schema struct { constraints []sc.Constraint // The property assertions for this schema. assertions []PropertyAssertion + // Cache list of columns declared in inputs and assignments. + column_cache []sc.Column } // EmptySchema is used to construct a fresh schema onto which new columns and @@ -56,6 +58,7 @@ func EmptySchema() *Schema { p.assignments = make([]sc.Assignment, 0) p.constraints = make([]sc.Constraint, 0) p.assertions = make([]PropertyAssertion, 0) + p.column_cache = make([]sc.Column, 0) // Done return p } @@ -76,7 +79,12 @@ func (p *Schema) AddDataColumn(context trace.Context, name string, base sc.Type) } cid := uint(len(p.inputs)) - p.inputs = append(p.inputs, assignment.NewDataColumn(context, name, base)) + col := assignment.NewDataColumn(context, name, base) + p.inputs = append(p.inputs, col) + // Update column cache + for c := col.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } return cid } @@ -101,7 +109,11 @@ func (p *Schema) AddLookupConstraint(handle string, source trace.Context, target func (p *Schema) AddAssignment(c sc.Assignment) uint { index := p.Columns().Count() p.assignments = append(p.assignments, c) - + // Update column cache + for c := c.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } + // Done return index } @@ -151,13 +163,7 @@ func (p *Schema) Assignments() util.Iterator[sc.Assignment] { // Columns returns an array over the underlying columns of this sc. // Specifically, the index of a column in this array is its column index. func (p *Schema) Columns() util.Iterator[sc.Column] { - inputs := util.NewArrayIterator(p.inputs) - is := util.NewFlattenIterator[sc.Declaration, sc.Column](inputs, - func(d sc.Declaration) util.Iterator[sc.Column] { return d.Columns() }) - ps := util.NewFlattenIterator[sc.Assignment, sc.Column](p.Assignments(), - func(d sc.Assignment) util.Iterator[sc.Column] { return d.Columns() }) - // - return is.Append(ps) + return util.NewArrayIterator(p.column_cache) } // Constraints returns an array over the underlying constraints of this diff --git a/pkg/mir/schema.go b/pkg/mir/schema.go index 82df9b31..6f66c117 100644 --- a/pkg/mir/schema.go +++ b/pkg/mir/schema.go @@ -46,6 +46,8 @@ type Schema struct { constraints []schema.Constraint // The property assertions for this schema. assertions []PropertyAssertion + // Cache list of columns declared in inputs and assignments. + column_cache []schema.Column } // EmptySchema is used to construct a fresh schema onto which new columns and @@ -57,6 +59,7 @@ func EmptySchema() *Schema { p.assignments = make([]schema.Assignment, 0) p.constraints = make([]schema.Constraint, 0) p.assertions = make([]PropertyAssertion, 0) + p.column_cache = make([]schema.Column, 0) // Done return p } @@ -74,8 +77,13 @@ func (p *Schema) AddDataColumn(context trace.Context, name string, base schema.T if context.Module() >= uint(len(p.modules)) { panic(fmt.Sprintf("invalid module index (%d)", context.Module())) } - - p.inputs = append(p.inputs, assignment.NewDataColumn(context, name, base)) + // Create column + col := assignment.NewDataColumn(context, name, base) + p.inputs = append(p.inputs, col) + // Update column cache + for c := col.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } } // AddAssignment appends a new assignment (i.e. set of computed columns) to be @@ -84,6 +92,10 @@ func (p *Schema) AddDataColumn(context trace.Context, name string, base schema.T func (p *Schema) AddAssignment(c schema.Assignment) uint { index := p.Columns().Count() p.assignments = append(p.assignments, c) + // Update column cache + for c := c.Columns(); c.HasNext(); { + p.column_cache = append(p.column_cache, c.Next()) + } return index } @@ -147,13 +159,7 @@ func (p *Schema) Assignments() util.Iterator[schema.Assignment] { // Columns returns an array over the underlying columns of this schema. // Specifically, the index of a column in this array is its column index. func (p *Schema) Columns() util.Iterator[schema.Column] { - inputs := util.NewArrayIterator(p.inputs) - is := util.NewFlattenIterator[schema.Declaration, schema.Column](inputs, - func(d schema.Declaration) util.Iterator[schema.Column] { return d.Columns() }) - ps := util.NewFlattenIterator[schema.Assignment, schema.Column](p.Assignments(), - func(d schema.Assignment) util.Iterator[schema.Column] { return d.Columns() }) - // - return is.Append(ps) + return util.NewArrayIterator(p.column_cache) } // Constraints returns an array over the underlying constraints of this