Skip to content

Commit

Permalink
Add metadata reader benchmarks
Browse files Browse the repository at this point in the history
Signed-off-by: Yasin Turan <[email protected]>
  • Loading branch information
turan18 committed Feb 1, 2024
1 parent db7df3a commit c7bfdda
Show file tree
Hide file tree
Showing 5 changed files with 440 additions and 187 deletions.
8 changes: 4 additions & 4 deletions metadata/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import (
"github.com/awslabs/soci-snapshotter/ztoc/compression"
)

// Attr reprensents the attributes of a node.
// Attr represents the attributes of a node.
type Attr struct {
// Size, for regular files, is the logical size of the file.
Size int64
Expand Down Expand Up @@ -102,7 +102,7 @@ type Options struct {
Telemetry *Telemetry
}

// Option is an option to configure the behaviour of reader.
// Option is an option to configure the behavior of reader.
type Option func(o *Options) error

// WithTelemetry option specifies the telemetry hooks
Expand All @@ -113,10 +113,10 @@ func WithTelemetry(telemetry *Telemetry) Option {
}
}

// A func which takes start time and records the diff
// MeasureLatencyHook is a func which takes start time and records the diff
type MeasureLatencyHook func(time.Time)

// A struct which defines telemetry hooks. By implementing these hooks you should be able to record
// Telemetry defines telemetry hooks. By implementing these hooks you should be able to record
// the latency metrics of the respective steps of SOCI open operation.
type Telemetry struct {
InitMetadataStoreLatency MeasureLatencyHook // measure time to initialize metadata store (in milliseconds)
Expand Down
260 changes: 233 additions & 27 deletions metadata/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,47 +33,253 @@
package metadata

import (
"io"
"compress/gzip"
"fmt"
_ "net/http/pprof"
"os"
"testing"
"time"

"github.com/awslabs/soci-snapshotter/util/testutil"
"github.com/awslabs/soci-snapshotter/ztoc"
bolt "go.etcd.io/bbolt"
"golang.org/x/sync/errgroup"
)

var allowedPrefix = [4]string{"", "./", "/", "../"}

var srcCompressions = map[string]int{
"gzip-nocompression": gzip.NoCompression,
"gzip-bestspeed": gzip.BestSpeed,
"gzip-bestcompression": gzip.BestCompression,
"gzip-defaultcompression": gzip.DefaultCompression,
"gzip-huffmanonly": gzip.HuffmanOnly,
}

func TestMetadataReader(t *testing.T) {
testReader(t, newTestableReader)
sampleTime := time.Now().Truncate(time.Second)
tests := []struct {
name string
in []testutil.TarEntry
want []check
}{
{
name: "files",
in: []testutil.TarEntry{
testutil.File("foo", "foofoo", testutil.WithFileMode(0644|os.ModeSetuid)),
testutil.Dir("bar/"),
testutil.File("bar/baz.txt", "bazbazbaz", testutil.WithFileOwner(1000, 1000)),
testutil.File("xxx.txt", "xxxxx", testutil.WithFileModTime(sampleTime)),
testutil.File("y.txt", "", testutil.WithFileXattrs(map[string]string{"testkey": "testval"})),
},
want: []check{
numOfNodes(6), // root dir + 1 dir + 4 files
hasFile("foo", 6),
hasMode("foo", 0644|os.ModeSetuid),
hasFile("bar/baz.txt", 9),
hasOwner("bar/baz.txt", 1000, 1000),
hasFile("xxx.txt", 5),
hasModTime("xxx.txt", sampleTime),
hasFile("y.txt", 0),
// For details on the keys of Xattrs, see https://pkg.go.dev/archive/tar#Header
hasXattrs("y.txt", map[string]string{"testkey": "testval"}),
},
},
{
name: "dirs",
in: []testutil.TarEntry{
testutil.Dir("foo/", testutil.WithDirMode(os.ModeDir|0600|os.ModeSticky)),
testutil.Dir("foo/bar/", testutil.WithDirOwner(1000, 1000)),
testutil.File("foo/bar/baz.txt", "testtest"),
testutil.File("foo/bar/xxxx", "x"),
testutil.File("foo/bar/yyy", "yyy"),
testutil.Dir("foo/a/", testutil.WithDirModTime(sampleTime)),
testutil.Dir("foo/a/1/", testutil.WithDirXattrs(map[string]string{"testkey": "testval"})),
testutil.File("foo/a/1/2", "1111111111"),
},
want: []check{
numOfNodes(9), // root dir + 4 dirs + 4 files
hasDirChildren("foo", "bar", "a"),
hasDirChildren("foo/bar", "baz.txt", "xxxx", "yyy"),
hasDirChildren("foo/a", "1"),
hasDirChildren("foo/a/1", "2"),
hasMode("foo", os.ModeDir|0600|os.ModeSticky),
hasOwner("foo/bar", 1000, 1000),
hasModTime("foo/a", sampleTime),
hasXattrs("foo/a/1", map[string]string{"testkey": "testval"}),
hasFile("foo/bar/baz.txt", 8),
hasFile("foo/bar/xxxx", 1),
hasFile("foo/bar/yyy", 3),
hasFile("foo/a/1/2", 10),
},
},
{
name: "hardlinks",
in: []testutil.TarEntry{
testutil.File("foo", "foofoo", testutil.WithFileOwner(1000, 1000)),
testutil.Dir("bar/"),
testutil.Link("bar/foolink", "foo"),
testutil.Link("bar/foolink2", "bar/foolink"),
testutil.Dir("bar/1/"),
testutil.File("bar/1/baz.txt", "testtest"),
testutil.Link("barlink", "bar/1/baz.txt"),
testutil.Symlink("foosym", "bar/foolink2"),
},
want: []check{
numOfNodes(6), // root dir + 2 dirs + 1 flie(linked) + 1 file(linked) + 1 symlink
hasFile("foo", 6),
hasOwner("foo", 1000, 1000),
hasFile("bar/foolink", 6),
hasOwner("bar/foolink", 1000, 1000),
hasFile("bar/foolink2", 6),
hasOwner("bar/foolink2", 1000, 1000),
hasFile("bar/1/baz.txt", 8),
hasFile("barlink", 8),
hasDirChildren("bar", "foolink", "foolink2", "1"),
hasDirChildren("bar/1", "baz.txt"),
sameNodes("foo", "bar/foolink", "bar/foolink2"),
sameNodes("bar/1/baz.txt", "barlink"),
linkName("foosym", "bar/foolink2"),
hasNumLink("foo", 3), // parent dir + 2 links
hasNumLink("barlink", 2), // parent dir + 1 link
hasNumLink("bar", 3), // parent + "." + child's ".."
},
},
{
name: "various files",
in: []testutil.TarEntry{
testutil.Dir("bar/"),
testutil.File("bar/../bar///////////////////foo", ""),
testutil.Chardev("bar/cdev", 10, 11),
testutil.Blockdev("bar/bdev", 100, 101),
testutil.Fifo("bar/fifo"),
},
want: []check{
numOfNodes(6), // root dir + 1 file + 1 dir + 1 cdev + 1 bdev + 1 fifo
hasFile("bar/foo", 0),
hasChardev("bar/cdev", 10, 11),
hasBlockdev("bar/bdev", 100, 101),
hasFifo("bar/fifo"),
},
},
}
for _, tt := range tests {
for _, prefix := range allowedPrefix {
prefix := prefix
for srcCompresionName, srcCompression := range srcCompressions {
t.Run(tt.name+"-"+srcCompresionName, func(t *testing.T) {
opts := []testutil.BuildTarOption{
testutil.WithPrefix(prefix),
}

ztoc, sr, err := ztoc.BuildZtocReader(t, tt.in, srcCompression, 64, opts...)
if err != nil {
t.Fatalf("failed to build ztoc: %v", err)
}
telemetry, checkCalled := newCalledTelemetry()

// create a metadata reader
r, err := newTestableReader(sr, ztoc.TOC, WithTelemetry(telemetry))
if err != nil {
t.Fatalf("failed to create new reader: %v", err)
}
defer r.Close()
t.Logf("vvvvv Node tree vvvvv")
t.Logf("[%d] ROOT", r.RootID())
dumpNodes(t, r, r.RootID(), 1)
t.Logf("^^^^^^^^^^^^^^^^^^^^^")
for _, want := range tt.want {
want(t, r)
}
if err := checkCalled(); err != nil {
t.Errorf("telemetry failure: %v", err)
}
})
}
}
}
}

func newTestableReader(sr *io.SectionReader, toc ztoc.TOC, opts ...Option) (testableReader, error) {
f, err := os.CreateTemp("", "readertestdb")
func BenchmarkMetadataReader(b *testing.B) {
testCases := []struct {
name string
entries int
}{
{
name: "Create metadata.Reader with few TOC entries",
entries: 1000,
},
{
name: "Create metadata.Reader with a good amount TOC entries",
entries: 10_000,
},
{
name: "Create metadata.Reader with many TOC entries",
entries: 50_000,
},
{
name: "Create metadata.Reader with an enormous amount of TOC entries",
entries: 100_000,
},
}

for _, tc := range testCases {
tempDB, clean, err := newTempDB()
defer clean()
if err != nil {
b.Fatalf("failed to initialize temp db: %v", err)
}
toc, err := generateTOC(tc.entries)
if err != nil {
b.Fatalf("failed to generate TOC: %v", err)
}
b.ResetTimer()
b.Run(tc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
if _, err := NewReader(tempDB, nil, toc); err != nil {
b.Fatalf("failed to create new reader: %v", err)
}
}

})
}
}

func BenchmarkConcurrentMetadataReader(b *testing.B) {
smallTOC, err := generateTOC(1000)
if err != nil {
return nil, err
b.Fatalf("failed to generate TOC: %v", err)
}
defer os.Remove(f.Name())
db, err := bolt.Open(f.Name(), 0600, nil)
mediumTOC, err := generateTOC(10_000)
if err != nil {
return nil, err
b.Fatalf("failed to generate TOC: %v", err)
}
r, err := NewReader(db, sr, toc, opts...)
largeTOC, err := generateTOC(50_000)
if err != nil {
return nil, err
b.Fatalf("failed to generate TOC: %v", err)
}
return &testableReadCloser{
testableReader: r.(*reader),
closeFn: func() error {
db.Close()
return os.Remove(f.Name())
},
}, nil
}

type testableReadCloser struct {
testableReader
closeFn func() error
}
tempDB, clean, err := newTempDB()
defer clean()
if err != nil {
b.Fatalf("failed to initialize temp db: %v", err)
}
tocs := []ztoc.TOC{smallTOC, mediumTOC, largeTOC}
var eg errgroup.Group
b.ResetTimer()
b.Run("Write small, medium and large TOC concurrently", func(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, toc := range tocs {
toc := toc
eg.Go(func() error {
if _, err := NewReader(tempDB, nil, toc); err != nil {
return fmt.Errorf("failed to create new reader: %v", err)
}
return nil
})
}
if err := eg.Wait(); err != nil {
b.Fatal(err)
}

func (r *testableReadCloser) Close() error {
r.closeFn()
return r.testableReader.Close()
}
})
}
Loading

0 comments on commit c7bfdda

Please sign in to comment.