From bbf695e435beab37ec7b8c91e0656a42950c7477 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Tue, 14 Nov 2023 08:44:31 -0700 Subject: [PATCH 01/21] Integrate UPF feature and tests Signed-off-by: char-1ee --- Makefile | 7 ++----- bin/default-rootfs.img | 4 ++-- ctriface/Makefile | 7 ++----- ctriface/iface_test.go | 11 ----------- go.mod | 2 +- go.sum | 4 ++-- vhive.go | 4 ---- vhive_test.go | 5 ----- 8 files changed, 9 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index 8e4f6f655..8ed8d005f 100644 --- a/Makefile +++ b/Makefile @@ -24,11 +24,8 @@ SUBDIRS:=ctriface taps misc profile EXTRAGOARGS:=-v -race -cover EXTRAGOARGS_NORACE:=-v EXTRATESTFILES:=vhive_test.go stats.go vhive.go functions.go -# User-level page faults are temporarily disabled (gh-807) -# WITHUPF:=-upfTest -# WITHLAZY:=-lazyTest -WITHUPF:= -WITHLAZY:= +WITHUPF:=-upfTest +WITHLAZY:=-lazyTest WITHSNAPSHOTS:=-snapshotsTest CTRDLOGDIR:=/tmp/ctrd-logs diff --git a/bin/default-rootfs.img b/bin/default-rootfs.img index 2a4d5e66a..36691dac7 100644 --- a/bin/default-rootfs.img +++ b/bin/default-rootfs.img @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58d378a908efd9b604da4659c64c81cb835932a00d382c10b0e2d0d8770fc7d7 -size 64409600 +oid sha256:e7f5439985a3bc40e6257e3da089e85cc9235f59402f0982bba3587b89ec4afe +size 64577536 diff --git a/ctriface/Makefile b/ctriface/Makefile index bc8f6fcd3..6d2811be7 100644 --- a/ctriface/Makefile +++ b/ctriface/Makefile @@ -23,11 +23,8 @@ EXTRAGOARGS:=-v -race -cover EXTRATESTFILES:=iface_test.go iface.go orch_options.go orch.go BENCHFILES:=bench_test.go iface.go orch_options.go orch.go -# User-level page faults are temporarily disabled (gh-807) -# WITHUPF:=-upf -# WITHLAZY:=-lazy -WITHUPF:= -WITHLAZY:= +WITHUPF:=-upf +WITHLAZY:=-lazy GOBENCH:=-v -timeout 1500s CTRDLOGDIR:=/tmp/ctrd-logs diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 7e678c6eb..ee50dec97 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -45,17 +45,6 @@ var ( isWithCache = flag.Bool("withCache", false, "Do not drop the cache before measurements") ) -func TestMain(m *testing.M) { - flag.Parse() - - if *isUPFEnabled { - log.Error("User-level page faults are temporarily disabled (gh-807)") - os.Exit(-1) - } - - os.Exit(m.Run()) -} - func TestPauseSnapResume(t *testing.T) { log.SetFormatter(&log.TextFormatter{ TimestampFormat: ctrdlog.RFC3339NanoFixed, diff --git a/go.mod b/go.mod index 09fb95f9e..cd66b4676 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,7 @@ replace ( ) replace ( - github.com/firecracker-microvm/firecracker-containerd => github.com/vhive-serverless/firecracker-containerd v0.0.0-20230912063208-ad6383f05e45 + github.com/firecracker-microvm/firecracker-containerd => github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 github.com/vhive-serverless/vhive/examples/protobuf/helloworld => ./examples/protobuf/helloworld ) diff --git a/go.sum b/go.sum index c5ed066b7..fd6b5eec0 100644 --- a/go.sum +++ b/go.sum @@ -147,6 +147,8 @@ github.com/cespare/prettybench v0.0.0-20150116022406-03b8cfe5406c/go.mod h1:Xe6Z github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5/go.mod h1:/iP1qXHoty45bqomnu2LM+VVyAEdWN+vtSHGlQgyxbw= +github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 h1:InrwKCxhDU1PJTNJ0wOHM/PvsIruaz2HriViJ5swrX4= +github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134/go.mod h1:XC5a/4PWbzipD5Ron745odZxoVy/J6d8xFldwTZJbSU= github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b/go.mod h1:TrMrLQfeENAPYPRsJuq3jsqdlRh3lvi6trTZJG8+tho= github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= @@ -1022,8 +1024,6 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk8LWSxF3s= github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= -github.com/vhive-serverless/firecracker-containerd v0.0.0-20230912063208-ad6383f05e45 h1:B+2NmtrRoWgfYkaqqG9Dyqud5HRjfibFpB8wbqER/PQ= -github.com/vhive-serverless/firecracker-containerd v0.0.0-20230912063208-ad6383f05e45/go.mod h1:XC5a/4PWbzipD5Ron745odZxoVy/J6d8xFldwTZJbSU= github.com/vishvananda/netlink v0.0.0-20171020171820-b2de5d10e38e/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= diff --git a/vhive.go b/vhive.go index 829b0eed0..506d17262 100644 --- a/vhive.go +++ b/vhive.go @@ -91,10 +91,6 @@ func main() { return } - if *isUPFEnabled { - log.Error("User-level page faults are temporarily disabled (gh-807)") - return - } if *isUPFEnabled && !*isSnapshotsEnabled { log.Error("User-level page faults are not supported without snapshots") diff --git a/vhive_test.go b/vhive_test.go index 9a7947f84..abf45e327 100644 --- a/vhive_test.go +++ b/vhive_test.go @@ -65,11 +65,6 @@ func TestMain(m *testing.M) { flag.Parse() - if *isUPFEnabledTest { - log.Error("User-level page faults are temporarily disabled (gh-807)") - os.Exit(-1) - } - log.Infof("Orchestrator snapshots enabled: %t", *isSnapshotsEnabledTest) log.Infof("Orchestrator UPF enabled: %t", *isUPFEnabledTest) log.Infof("Orchestrator lazy serving mode enabled: %t", *isLazyModeTest) From 9fd3347164aad66e71e28135b108d5359a71b92b Mon Sep 17 00:00:00 2001 From: char-1ee Date: Wed, 22 Nov 2023 09:41:08 -0700 Subject: [PATCH 02/21] Apply gofmt formatting Signed-off-by: char-1ee --- ctriface/iface_test.go | 5 +++++ vhive.go | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index ee50dec97..2f1526c9a 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -45,6 +45,11 @@ var ( isWithCache = flag.Bool("withCache", false, "Do not drop the cache before measurements") ) +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + func TestPauseSnapResume(t *testing.T) { log.SetFormatter(&log.TextFormatter{ TimestampFormat: ctrdlog.RFC3339NanoFixed, diff --git a/vhive.go b/vhive.go index 506d17262..d829f63be 100644 --- a/vhive.go +++ b/vhive.go @@ -91,7 +91,6 @@ func main() { return } - if *isUPFEnabled && !*isSnapshotsEnabled { log.Error("User-level page faults are not supported without snapshots") return From 7c815fb3ab37f82664d6991a75a32fdf9955145f Mon Sep 17 00:00:00 2001 From: char-1ee Date: Sun, 10 Dec 2023 18:59:51 -0700 Subject: [PATCH 03/21] Fix UPF legacy Signed-off-by: char-1ee --- ctriface/iface.go | 18 ++++++++++++++++-- memory/manager/manager.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index d6e79a114..498261a7e 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -64,6 +64,8 @@ type StartVMResponse struct { const ( testImageName = "ghcr.io/ease-lab/helloworld:var_workload" + fileBackend = "File" + uffdBackend = "Uffd" ) // StartVM Boots a VM if it does not exist @@ -104,7 +106,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa tStart = time.Now() conf := o.getVMConfig(vm) - _, err = o.fcClient.CreateVM(ctx, conf) + resp, err := o.fcClient.CreateVM(ctx, conf) startVMMetric.MetricMap[metrics.FcCreateVM] = metrics.ToUS(time.Since(tStart)) if err != nil { return nil, nil, errors.Wrap(err, "failed to create the microVM in firecracker-containerd") @@ -214,7 +216,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa VMMStatePath: o.getSnapshotFile(vmID), WorkingSetPath: o.getWorkingSetFile(vmID), // FIXME (gh-807) - //InstanceSockAddr: resp.UPFSockPath, + InstanceSockAddr: resp.GetSocketPath(), } if err := o.memoryManager.RegisterVM(stateCfg); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") @@ -497,7 +499,19 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap conf.MemFilePath = snap.GetMemFilePath() conf.ContainerSnapshotPath = containerSnap.GetDevicePath() + if conf.MemBackend == nil { + conf.MemBackend = &proto.MemoryBackend{} + } + conf.MemBackend.BackendType = fileBackend + conf.MemBackend.BackendPath = snap.GetMemFilePath() + if o.GetUPFEnabled() { + conf.MemBackend.BackendType = uffdBackend + conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(vmID) + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to get UPF socket path for uffd backend") + } + if err := o.memoryManager.FetchState(vmID); err != nil { return nil, nil, err } diff --git a/memory/manager/manager.go b/memory/manager/manager.go index c1f9464aa..1c9a755fe 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -351,6 +351,35 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } +func (m *MemoryManager) GetUPFSockPath(vmID string) (string, error) { + logger := log.WithFields(log.Fields{"vmID": vmID}) + + logger.Debug("Get the path of firecracker unix domain socket") + + m.Lock() + + state, ok := m.instances[vmID] + if !ok { + m.Unlock() + logger.Error("VM not registered with the memory manager") + return "", errors.New("VM not registered with the memory manager") + } + + m.Unlock() + + if state.isActive { + logger.Error("Cannot get stats while VM is active") + return "", errors.New("Cannot get stats while VM is active") + } + + if !m.MetricsModeOn || !state.metricsModeOn { + logger.Error("Metrics mode is not on") + return "", errors.New("Metrics mode is not on") + } + + return m.instances[vmID].SnapshotStateCfg.InstanceSockAddr, nil +} + func getLazyHeaderStats(state *SnapshotState, functionName string) ([]string, []string) { header := []string{ "FuncName", From 11c9cbd42de53eaa4c46c900b8b13c8db5598e2d Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 11 Dec 2023 08:34:49 -0700 Subject: [PATCH 04/21] Clean up formatting Signed-off-by: char-1ee --- ctriface/iface.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index 498261a7e..cdb9d876f 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -64,8 +64,8 @@ type StartVMResponse struct { const ( testImageName = "ghcr.io/ease-lab/helloworld:var_workload" - fileBackend = "File" - uffdBackend = "Uffd" + fileBackend = "File" + uffdBackend = "Uffd" ) // StartVM Boots a VM if it does not exist @@ -208,14 +208,13 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa logger.Debug("Registering VM with the memory manager") stateCfg := manager.SnapshotStateCfg{ - VMID: vmID, - GuestMemPath: o.getMemoryFile(vmID), - BaseDir: o.getVMBaseDir(vmID), - GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024, - IsLazyMode: o.isLazyMode, - VMMStatePath: o.getSnapshotFile(vmID), - WorkingSetPath: o.getWorkingSetFile(vmID), - // FIXME (gh-807) + VMID: vmID, + GuestMemPath: o.getMemoryFile(vmID), + BaseDir: o.getVMBaseDir(vmID), + GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024, + IsLazyMode: o.isLazyMode, + VMMStatePath: o.getSnapshotFile(vmID), + WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: resp.GetSocketPath(), } if err := o.memoryManager.RegisterVM(stateCfg); err != nil { From 5c2fff53d1736afad38c59ac9fe50e944cf11189 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 11 Dec 2023 20:47:53 -0700 Subject: [PATCH 05/21] Update firecracker binaries Signed-off-by: char-1ee --- bin/containerd-shim-aws-firecracker | 4 ++-- bin/firecracker | 4 ++-- bin/firecracker-containerd | 4 ++-- bin/firecracker-ctr | 4 ++-- bin/jailer | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bin/containerd-shim-aws-firecracker b/bin/containerd-shim-aws-firecracker index 9aec8ccaa..78eb4aee3 100755 --- a/bin/containerd-shim-aws-firecracker +++ b/bin/containerd-shim-aws-firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4208028fa44c5897563f67b4f1c56efd1400d9a145826b2556773c9d1876bd93 -size 33776240 +oid sha256:1c911676297a111a8c4e7cafb3cc42f3d4c1153a314e0c1d45f5136a89a3eb86 +size 36382048 diff --git a/bin/firecracker b/bin/firecracker index 750f24cfe..9fd1dedfc 100755 --- a/bin/firecracker +++ b/bin/firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8060c35d1669a57197985e4589b3e98f4a221b334c6d1f102aee62a3f77822cd -size 10012224 +oid sha256:d2774cdc3aaca482f52aea27ed1e51a35e02e68ce764fe87ed780c3a4e6c3513 +size 2455144 diff --git a/bin/firecracker-containerd b/bin/firecracker-containerd index 7cb01c83b..671f9f1bb 100755 --- a/bin/firecracker-containerd +++ b/bin/firecracker-containerd @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1959d901c4a8a6bdf8394628d5c62c0d6eba23a976acfdf8f9173fc21bc26e68 -size 69041344 +oid sha256:3dfa862ba43137ee5ccb6bec70cfa1cdecc21ea1fb94852b3701b69e3426fea3 +size 72480720 diff --git a/bin/firecracker-ctr b/bin/firecracker-ctr index 40d64e16f..9170bd8d3 100755 --- a/bin/firecracker-ctr +++ b/bin/firecracker-ctr @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfc28b8f8092d10190a4f7fc74cea36be12a9ef9f4ec81a3b7b46cde7d7ea857 -size 33034096 +oid sha256:4f457e142a05e438cc68e50020c2b29dc759a59c8865c0103eebe5e7138d279c +size 35333240 diff --git a/bin/jailer b/bin/jailer index 65bb271f0..95474dcdd 100755 --- a/bin/jailer +++ b/bin/jailer @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d57d145e171bde170d1218dfc6ea705097760926a568f79f9c09af52bd671c6b -size 3461320 +oid sha256:5306b2d86c85db70be5b063eb5f692b1c7bcd2ab822dbdee1bbd01a067cab129 +size 794328 From 0ffd48d3d672b7c60c96b5304c5267771cae1d98 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 18 Dec 2023 20:41:42 -0700 Subject: [PATCH 06/21] Refactor parameters Signed-off-by: char-1ee --- bin/containerd-shim-aws-firecracker | 4 ++-- bin/default-rootfs.img | 4 ++-- bin/firecracker | 4 ++-- bin/firecracker-containerd | 4 ++-- bin/firecracker-ctr | 4 ++-- bin/jailer | 4 ++-- ctriface/iface.go | 7 +++---- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/bin/containerd-shim-aws-firecracker b/bin/containerd-shim-aws-firecracker index 78eb4aee3..108a88b8e 100755 --- a/bin/containerd-shim-aws-firecracker +++ b/bin/containerd-shim-aws-firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c911676297a111a8c4e7cafb3cc42f3d4c1153a314e0c1d45f5136a89a3eb86 -size 36382048 +oid sha256:299c9623ed3262dede9e37aa52a76c84c3342cdcadc1571a7997c3c160ff9cc0 +size 36354520 diff --git a/bin/default-rootfs.img b/bin/default-rootfs.img index 36691dac7..1b2764cfd 100644 --- a/bin/default-rootfs.img +++ b/bin/default-rootfs.img @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7f5439985a3bc40e6257e3da089e85cc9235f59402f0982bba3587b89ec4afe -size 64577536 +oid sha256:ba09eb5181dd977c16af4a1890333dc61fccb01dc4a7c0b11ffc229863e5462e +size 73318400 diff --git a/bin/firecracker b/bin/firecracker index 9fd1dedfc..639384a49 100755 --- a/bin/firecracker +++ b/bin/firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2774cdc3aaca482f52aea27ed1e51a35e02e68ce764fe87ed780c3a4e6c3513 -size 2455144 +oid sha256:c44d9ea84a0ff0c5315ed0d3672494f77bafed6f6edaaf6b050a4b5e3425ebe1 +size 10012224 diff --git a/bin/firecracker-containerd b/bin/firecracker-containerd index 671f9f1bb..2334c5121 100755 --- a/bin/firecracker-containerd +++ b/bin/firecracker-containerd @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3dfa862ba43137ee5ccb6bec70cfa1cdecc21ea1fb94852b3701b69e3426fea3 -size 72480720 +oid sha256:c4450b3c8e9cb2db1a193cd2b9594eb90054cdbdae1d837a7f426b4b0d83950f +size 72445304 diff --git a/bin/firecracker-ctr b/bin/firecracker-ctr index 9170bd8d3..3bfb99c22 100755 --- a/bin/firecracker-ctr +++ b/bin/firecracker-ctr @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f457e142a05e438cc68e50020c2b29dc759a59c8865c0103eebe5e7138d279c -size 35333240 +oid sha256:37487275ed6a08f4e759d9847435537bf52f20c207087e3e4226e599de8bae72 +size 35276648 diff --git a/bin/jailer b/bin/jailer index 95474dcdd..65bb271f0 100755 --- a/bin/jailer +++ b/bin/jailer @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5306b2d86c85db70be5b063eb5f692b1c7bcd2ab822dbdee1bbd01a067cab129 -size 794328 +oid sha256:d57d145e171bde170d1218dfc6ea705097760926a568f79f9c09af52bd671c6b +size 3461320 diff --git a/ctriface/iface.go b/ctriface/iface.go index cdb9d876f..c339d93d8 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -498,11 +498,10 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap conf.MemFilePath = snap.GetMemFilePath() conf.ContainerSnapshotPath = containerSnap.GetDevicePath() - if conf.MemBackend == nil { - conf.MemBackend = &proto.MemoryBackend{} + conf.MemBackend = &proto.MemoryBackend{ + BackendType: fileBackend, + BackendPath: snap.GetMemFilePath(), } - conf.MemBackend.BackendType = fileBackend - conf.MemBackend.BackendPath = snap.GetMemFilePath() if o.GetUPFEnabled() { conf.MemBackend.BackendType = uffdBackend From 065cb9fd6b55aa606d7fa3c3340eb077908f1dda Mon Sep 17 00:00:00 2001 From: char-1ee Date: Tue, 19 Dec 2023 23:17:29 -0700 Subject: [PATCH 07/21] Fix mem_backend and mem_file_path exclusion Signed-off-by: char-1ee --- ctriface/iface.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index c339d93d8..b016cae91 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -495,14 +495,8 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap conf := o.getVMConfig(vm) conf.LoadSnapshot = true conf.SnapshotPath = snap.GetSnapshotFilePath() - conf.MemFilePath = snap.GetMemFilePath() conf.ContainerSnapshotPath = containerSnap.GetDevicePath() - conf.MemBackend = &proto.MemoryBackend{ - BackendType: fileBackend, - BackendPath: snap.GetMemFilePath(), - } - if o.GetUPFEnabled() { conf.MemBackend.BackendType = uffdBackend conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(vmID) @@ -513,6 +507,8 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap if err := o.memoryManager.FetchState(vmID); err != nil { return nil, nil, err } + } else { + conf.MemFilePath = snap.GetMemFilePath() } tStart = time.Now() From ea6061c950fcfd75dadca662d4e91c013a41be25 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Wed, 17 Jan 2024 23:17:54 -0700 Subject: [PATCH 08/21] Test --- Makefile | 5 + bin/containerd-shim-aws-firecracker | 4 +- bin/default-rootfs.img | 2 +- cri/firecracker/coordinator.go | 2 +- ctriface/failing_test.go | 2 +- ctriface/iface.go | 37 ++- ctriface/iface_test.go | 2 +- ctriface/manual_cleanup_test.go | 12 +- functions.go | 9 +- go.mod | 4 +- memory/manager/manager.go | 7 +- run.sh | 5 + vhive_test.go | 434 ++++++++++++++-------------- 13 files changed, 279 insertions(+), 246 deletions(-) create mode 100644 run.sh diff --git a/Makefile b/Makefile index 8ed8d005f..7793ed1e0 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,11 @@ test-all: test-subdirs test-orch test-orch: test test-man +debug: + ./scripts/clean_fcctr.sh + sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/fccd_orch_upf_log.out 2>$(CTRDLOGDIR)/fccd_orch_upf_log.err & + sudo env "PATH=$(PATH)" go test $(EXTRATESTFILES) -short $(EXTRAGOARGS) -args $(WITHSNAPSHOTS) $(WITHUPF) + test: ./scripts/clean_fcctr.sh sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/fccd_orch_noupf_log.out 2>$(CTRDLOGDIR)/fccd_orch_noupf_log.err & diff --git a/bin/containerd-shim-aws-firecracker b/bin/containerd-shim-aws-firecracker index 108a88b8e..84c3ba700 100755 --- a/bin/containerd-shim-aws-firecracker +++ b/bin/containerd-shim-aws-firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:299c9623ed3262dede9e37aa52a76c84c3342cdcadc1571a7997c3c160ff9cc0 -size 36354520 +oid sha256:9595863ca1a4903d7cd0715a5da046e1ce4d88c76bd58c9deef586b583bed79a +size 36354528 diff --git a/bin/default-rootfs.img b/bin/default-rootfs.img index 1b2764cfd..e2bfac4d5 100644 --- a/bin/default-rootfs.img +++ b/bin/default-rootfs.img @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba09eb5181dd977c16af4a1890333dc61fccb01dc4a7c0b11ffc229863e5462e +oid sha256:7a2e7504d702db942e88158547c1076db786f861fb0b69ec96eb27716e1e37b1 size 73318400 diff --git a/cri/firecracker/coordinator.go b/cri/firecracker/coordinator.go index 9acfe3d3c..b26389608 100644 --- a/cri/firecracker/coordinator.go +++ b/cri/firecracker/coordinator.go @@ -181,7 +181,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, snap) + resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, vmID, snap) if err != nil { logger.WithError(err).Error("failed to load VM") return nil, err diff --git a/ctriface/failing_test.go b/ctriface/failing_test.go index 079684066..9048e42e5 100644 --- a/ctriface/failing_test.go +++ b/ctriface/failing_test.go @@ -69,7 +69,7 @@ func TestStartSnapStop(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "", vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/ctriface/iface.go b/ctriface/iface.go index b016cae91..2e2892a33 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -217,6 +217,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: resp.GetSocketPath(), } + logger.Debugf("TEST: show socket path: %s", resp.GetSocketPath()) if err := o.memoryManager.RegisterVM(stateCfg); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") // NOTE (Plamen): Potentially need a defer(DeregisteVM) here if RegisterVM is not last to execute @@ -448,7 +449,7 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string, snap *sn } // LoadSnapshot Loads a snapshot of a VM -func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { +func (o *Orchestrator) LoadSnapshot(ctx context.Context, snapVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { var ( loadSnapshotMetric *metrics.Metric = metrics.NewMetric() tStart time.Time @@ -496,27 +497,33 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap conf.LoadSnapshot = true conf.SnapshotPath = snap.GetSnapshotFilePath() conf.ContainerSnapshotPath = containerSnap.GetDevicePath() + conf.MemBackend = &proto.MemoryBackend { + BackendType: fileBackend, + BackendPath: snap.GetMemFilePath(), + } if o.GetUPFEnabled() { + logger.Debug("TEST: UPF is enabled") conf.MemBackend.BackendType = uffdBackend - conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(vmID) + conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(snapVmID) + logger.Debugf("TEST: the upf socket: %s", conf.MemBackend.BackendPath) if err != nil { return nil, nil, errors.Wrapf(err, "failed to get UPF socket path for uffd backend") } - if err := o.memoryManager.FetchState(vmID); err != nil { + if err := o.memoryManager.FetchState(snapVmID); err != nil { return nil, nil, err } - } else { - conf.MemFilePath = snap.GetMemFilePath() } tStart = time.Now() + newUPFSockPath := "" go func() { defer close(loadDone) - if _, loadErr = o.fcClient.CreateVM(ctx, conf); loadErr != nil { + resp, loadErr := o.fcClient.CreateVM(ctx, conf) + if loadErr != nil { logger.Error("Failed to load snapshot of the VM: ", loadErr) logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) files, err := os.ReadDir(filepath.Dir(snap.GetSnapshotFilePath())) @@ -542,9 +549,27 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snap } logger.Error(snapFiles) } + newUPFSockPath = resp.GetSocketPath() }() + logger.Debug("TEST: CreatVM request sent") if o.GetUPFEnabled() { + + logger.Debug("TEST: Registering VM with the memory manager") + + stateCfg := manager.SnapshotStateCfg{ + VMID: vmID, + GuestMemPath: o.getMemoryFile(vmID), + BaseDir: o.getVMBaseDir(vmID), + GuestMemSize: int(conf.MachineCfg.MemSizeMib) * 1024 * 1024, + IsLazyMode: o.isLazyMode, + VMMStatePath: o.getSnapshotFile(vmID), + WorkingSetPath: o.getWorkingSetFile(vmID), + InstanceSockAddr: newUPFSockPath, + } + if err := o.memoryManager.RegisterVM(stateCfg); err != nil { + logger.Error(err, "failed to register new VM with memory manager") + } if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 2f1526c9a..c41a108fb 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -59,7 +59,7 @@ func TestPauseSnapResume(t *testing.T) { log.SetOutput(os.Stdout) - log.SetLevel(log.InfoLevel) + log.SetLevel(log.DebugLevel) testTimeout := 120 * time.Second ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index 85ff312c1..21f57aaae 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -91,7 +91,7 @@ func TestSnapLoad(t *testing.T) { vmID = "2" - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -152,7 +152,7 @@ func TestSnapLoadMultiple(t *testing.T) { vmID = "4" - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -163,7 +163,7 @@ func TestSnapLoadMultiple(t *testing.T) { vmID = "5" - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -237,7 +237,7 @@ func TestParallelSnapLoad(t *testing.T) { vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) _, err = orch.ResumeVM(ctx, vmID) @@ -359,7 +359,7 @@ func TestParallelPhasedSnapLoad(t *testing.T) { snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err := orch.LoadSnapshot(ctx, vmID, snap) + _, _, err := orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) }(i) } @@ -482,7 +482,7 @@ func TestRemoteSnapLoad(t *testing.T) { snap := snapshotting.NewSnapshot(revision, remoteSnapshotsDir, testImageName) - _, _, err = orch.LoadSnapshot(ctx, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) require.NoError(t, err, "Failed to load remote snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/functions.go b/functions.go index 91146d34a..28c527917 100644 --- a/functions.go +++ b/functions.go @@ -362,7 +362,10 @@ func (f *Function) AddInstance() *metrics.Metric { if f.isSnapshotReady { var resp *ctriface.StartVMResponse - resp, metr = f.LoadInstance(f.getVMID()) + // resp, metr = f.LoadInstance(f.getVMID()) + snapVmID := fmt.Sprintf("%s-%d", f.fID, f.lastInstanceID - 1) + currVmID := f.getVMID() + resp, metr = f.LoadInstance(snapVmID, currVmID) f.guestIP = resp.GuestIP f.vmID = f.getVMID() f.lastInstanceID++ @@ -479,7 +482,7 @@ func (f *Function) CreateInstanceSnapshot() { // LoadInstance Loads a new instance of the function from its snapshot and resumes it // The tap, the shim and the vmID remain the same -func (f *Function) LoadInstance(vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { +func (f *Function) LoadInstance(snapVmID string, vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { logger := log.WithFields(log.Fields{"fID": f.fID}) logger.Debug("Loading instance") @@ -492,7 +495,7 @@ func (f *Function) LoadInstance(vmID string) (*ctriface.StartVMResponse, *metric log.Panic(err) } - resp, loadMetr, err := orch.LoadSnapshot(ctx, vmID, snap) + resp, loadMetr, err := orch.LoadSnapshot(ctx, snapVmID, vmID, snap) if err != nil { log.Panic(err) } diff --git a/go.mod b/go.mod index cd66b4676..9aca89c1d 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,9 @@ replace ( ) replace ( - github.com/firecracker-microvm/firecracker-containerd => github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 + github.com/firecracker-microvm/firecracker-containerd => ../firecracker-containerd + + // github.com/firecracker-microvm/firecracker-containerd => github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 github.com/vhive-serverless/vhive/examples/protobuf/helloworld => ./examples/protobuf/helloworld ) diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 1c9a755fe..375222c1c 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -180,7 +180,7 @@ func (m *MemoryManager) FetchState(vmID string) error { state, ok = m.instances[vmID] if !ok { m.Unlock() - logger.Error("VM not registered with the memory manager") + logger.Error("TEST(fetch state): VM not registered with the memory manager") return errors.New("VM not registered with the memory manager") } @@ -372,11 +372,6 @@ func (m *MemoryManager) GetUPFSockPath(vmID string) (string, error) { return "", errors.New("Cannot get stats while VM is active") } - if !m.MetricsModeOn || !state.metricsModeOn { - logger.Error("Metrics mode is not on") - return "", errors.New("Metrics mode is not on") - } - return m.instances[vmID].SnapshotStateCfg.InstanceSockAddr, nil } diff --git a/run.sh b/run.sh new file mode 100644 index 000000000..dbae4c2cb --- /dev/null +++ b/run.sh @@ -0,0 +1,5 @@ +./scripts/clean_fcctr.sh +./scripts/cloudlab/setup_node.sh +go build -race -v -a ./... +make debug > output.log 2>&1 +code output.log diff --git a/vhive_test.go b/vhive_test.go index abf45e327..f76f543b6 100644 --- a/vhive_test.go +++ b/vhive_test.go @@ -26,8 +26,6 @@ import ( "context" "flag" "os" - "strconv" - "sync" "testing" ctrdlog "github.com/containerd/containerd/log" @@ -61,7 +59,7 @@ func TestMain(m *testing.M) { log.SetOutput(os.Stdout) - log.SetLevel(log.InfoLevel) + log.SetLevel(log.DebugLevel) flag.Parse() @@ -94,53 +92,53 @@ func TestMain(m *testing.M) { os.Exit(ret) } -func TestSendToFunctionSerial(t *testing.T) { - fID := "1" - var ( - servedTh uint64 - pinnedFuncNum int - ) - funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - for i := 0; i < 2; i++ { - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - if i == 0 { - require.Equal(t, resp.IsColdStart, true) - } - - require.Equal(t, resp.Payload, "Hello, world!") - } - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) -} - -func TestSendToFunctionParallel(t *testing.T) { - fID := "2" - var ( - servedTh uint64 - pinnedFuncNum int - ) - funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - var vmGroup sync.WaitGroup - for i := 0; i < 100; i++ { - vmGroup.Add(1) - - go func(i int) { - defer vmGroup.Done() - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - }(i) - - } - vmGroup.Wait() - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) -} +// func TestSendToFunctionSerial(t *testing.T) { +// fID := "1" +// var ( +// servedTh uint64 +// pinnedFuncNum int +// ) +// funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// for i := 0; i < 2; i++ { +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// if i == 0 { +// require.Equal(t, resp.IsColdStart, true) +// } + +// require.Equal(t, resp.Payload, "Hello, world!") +// } + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// } + +// func TestSendToFunctionParallel(t *testing.T) { +// fID := "2" +// var ( +// servedTh uint64 +// pinnedFuncNum int +// ) +// funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// var vmGroup sync.WaitGroup +// for i := 0; i < 100; i++ { +// vmGroup.Add(1) + +// go func(i int) { +// defer vmGroup.Done() +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") +// }(i) + +// } +// vmGroup.Wait() + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// } func TestStartSendStopTwice(t *testing.T) { fID := "3" @@ -167,171 +165,171 @@ func TestStartSendStopTwice(t *testing.T) { require.Equal(t, 2, int(startsGot), "Cold start (starts) stats are wrong") } -func TestStatsNotNumericFunction(t *testing.T) { - fID := "not-cld" - var ( - servedTh uint64 = 1 - pinnedFuncNum int = 2 - ) - funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) - - servedGot := funcPool.stats.statMap[fID].served - require.Equal(t, 1, int(servedGot), "Cold start (served) stats are wrong") - startsGot := funcPool.stats.statMap[fID].started - require.Equal(t, 1, int(startsGot), "Cold start (starts) stats are wrong") -} - -func TestStatsNotColdFunction(t *testing.T) { - fID := "4" - var ( - servedTh uint64 = 1 - pinnedFuncNum int = 4 - ) - funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) - - servedGot := funcPool.stats.statMap[fID].served - require.Equal(t, 1, int(servedGot), "Cold start (served) stats are wrong") - startsGot := funcPool.stats.statMap[fID].started - require.Equal(t, 1, int(startsGot), "Cold start (starts) stats are wrong") -} - -func TestSaveMemorySerial(t *testing.T) { - fID := "5" - var ( - servedTh uint64 = 40 - pinnedFuncNum int = 2 - ) - funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - for i := 0; i < 100; i++ { - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - } - - startsGot := funcPool.stats.statMap[fID].started - require.Equal(t, 3, int(startsGot), "Cold start (starts) stats are wrong") - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) -} - -func TestSaveMemoryParallel(t *testing.T) { - fID := "6" - var ( - servedTh uint64 = 40 - pinnedFuncNum int = 2 - ) - funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - var vmGroup sync.WaitGroup - for i := 0; i < 100; i++ { - vmGroup.Add(1) - - go func(i int) { - defer vmGroup.Done() - - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - }(i) - - } - vmGroup.Wait() - - startsGot := funcPool.stats.statMap[fID].started - require.Equal(t, 3, int(startsGot), "Cold start (starts) stats are wrong") - - message, err := funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) -} - -func TestDirectStartStopVM(t *testing.T) { - fID := "7" - var ( - servedTh uint64 - pinnedFuncNum int - ) - funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - message, err := funcPool.AddInstance(fID, testImageName) - require.NoError(t, err, "This error should never happen (addInstance())"+message) - - resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") - require.NoError(t, err, "Function returned error") - require.Equal(t, resp.Payload, "Hello, world!") - - message, err = funcPool.RemoveInstance(fID, testImageName, true) - require.NoError(t, err, "Function returned error, "+message) -} - -func TestAllFunctions(t *testing.T) { - - if testing.Short() { - t.Skip("skipping TestAllFunctions in non-nightly runs.") - } - - images := []string{ - "ghcr.io/ease-lab/helloworld:var_workload", - "ghcr.io/ease-lab/chameleon:var_workload", - "ghcr.io/ease-lab/pyaes:var_workload", - "ghcr.io/ease-lab/image_rotate:var_workload", - "ghcr.io/ease-lab/json_serdes:var_workload", - "ghcr.io/ease-lab/lr_serving:var_workload", - "ghcr.io/ease-lab/cnn_serving:var_workload", - "ghcr.io/ease-lab/rnn_serving:var_workload", - "ghcr.io/ease-lab/lr_training:var_workload", - "ghcr.io/ease-lab/springboot:var_workload", - } - var ( - servedTh uint64 - pinnedFuncNum int - ) - funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) - - for i := 0; i < 2; i++ { - var vmGroup sync.WaitGroup - for fID, imageName := range images { - reqs := []string{"world", "record", "replay"} - resps := []string{"world", "record_response", "replay_response"} - for k := 0; k < 3; k++ { - vmGroup.Add(1) - go func(fID int, imageName, request, response string) { - defer vmGroup.Done() - - resp, _, err := funcPool.Serve(context.Background(), strconv.Itoa(8+fID), imageName, request) - require.NoError(t, err, "Function returned error") - - require.Equal(t, resp.Payload, "Hello, "+response+"!") - }(fID, imageName, reqs[k], resps[k]) - } - vmGroup.Wait() - } - } - - var vmGroup sync.WaitGroup - for fID, imageName := range images { - vmGroup.Add(1) - go func(fID int, imageName string) { - defer vmGroup.Done() - - message, err := funcPool.RemoveInstance(strconv.Itoa(8+fID), imageName, true) - require.NoError(t, err, "Function returned error, "+message) - }(fID, imageName) - } - vmGroup.Wait() -} +// func TestStatsNotNumericFunction(t *testing.T) { +// fID := "not-cld" +// var ( +// servedTh uint64 = 1 +// pinnedFuncNum int = 2 +// ) +// funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) + +// servedGot := funcPool.stats.statMap[fID].served +// require.Equal(t, 1, int(servedGot), "Cold start (served) stats are wrong") +// startsGot := funcPool.stats.statMap[fID].started +// require.Equal(t, 1, int(startsGot), "Cold start (starts) stats are wrong") +// } + +// func TestStatsNotColdFunction(t *testing.T) { +// fID := "4" +// var ( +// servedTh uint64 = 1 +// pinnedFuncNum int = 4 +// ) +// funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) + +// servedGot := funcPool.stats.statMap[fID].served +// require.Equal(t, 1, int(servedGot), "Cold start (served) stats are wrong") +// startsGot := funcPool.stats.statMap[fID].started +// require.Equal(t, 1, int(startsGot), "Cold start (starts) stats are wrong") +// } + +// func TestSaveMemorySerial(t *testing.T) { +// fID := "5" +// var ( +// servedTh uint64 = 40 +// pinnedFuncNum int = 2 +// ) +// funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// for i := 0; i < 100; i++ { +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") +// } + +// startsGot := funcPool.stats.statMap[fID].started +// require.Equal(t, 3, int(startsGot), "Cold start (starts) stats are wrong") + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// } + +// func TestSaveMemoryParallel(t *testing.T) { +// fID := "6" +// var ( +// servedTh uint64 = 40 +// pinnedFuncNum int = 2 +// ) +// funcPool = NewFuncPool(isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// var vmGroup sync.WaitGroup +// for i := 0; i < 100; i++ { +// vmGroup.Add(1) + +// go func(i int) { +// defer vmGroup.Done() + +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") +// }(i) + +// } +// vmGroup.Wait() + +// startsGot := funcPool.stats.statMap[fID].started +// require.Equal(t, 3, int(startsGot), "Cold start (starts) stats are wrong") + +// message, err := funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// } + +// func TestDirectStartStopVM(t *testing.T) { +// fID := "7" +// var ( +// servedTh uint64 +// pinnedFuncNum int +// ) +// funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// message, err := funcPool.AddInstance(fID, testImageName) +// require.NoError(t, err, "This error should never happen (addInstance())"+message) + +// resp, _, err := funcPool.Serve(context.Background(), fID, testImageName, "world") +// require.NoError(t, err, "Function returned error") +// require.Equal(t, resp.Payload, "Hello, world!") + +// message, err = funcPool.RemoveInstance(fID, testImageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// } + +// func TestAllFunctions(t *testing.T) { + +// if testing.Short() { +// t.Skip("skipping TestAllFunctions in non-nightly runs.") +// } + +// images := []string{ +// "ghcr.io/ease-lab/helloworld:var_workload", +// "ghcr.io/ease-lab/chameleon:var_workload", +// "ghcr.io/ease-lab/pyaes:var_workload", +// "ghcr.io/ease-lab/image_rotate:var_workload", +// "ghcr.io/ease-lab/json_serdes:var_workload", +// "ghcr.io/ease-lab/lr_serving:var_workload", +// "ghcr.io/ease-lab/cnn_serving:var_workload", +// "ghcr.io/ease-lab/rnn_serving:var_workload", +// "ghcr.io/ease-lab/lr_training:var_workload", +// "ghcr.io/ease-lab/springboot:var_workload", +// } +// var ( +// servedTh uint64 +// pinnedFuncNum int +// ) +// funcPool = NewFuncPool(!isSaveMemoryConst, servedTh, pinnedFuncNum, isTestModeConst) + +// for i := 0; i < 2; i++ { +// var vmGroup sync.WaitGroup +// for fID, imageName := range images { +// reqs := []string{"world", "record", "replay"} +// resps := []string{"world", "record_response", "replay_response"} +// for k := 0; k < 3; k++ { +// vmGroup.Add(1) +// go func(fID int, imageName, request, response string) { +// defer vmGroup.Done() + +// resp, _, err := funcPool.Serve(context.Background(), strconv.Itoa(8+fID), imageName, request) +// require.NoError(t, err, "Function returned error") + +// require.Equal(t, resp.Payload, "Hello, "+response+"!") +// }(fID, imageName, reqs[k], resps[k]) +// } +// vmGroup.Wait() +// } +// } + +// var vmGroup sync.WaitGroup +// for fID, imageName := range images { +// vmGroup.Add(1) +// go func(fID int, imageName string) { +// defer vmGroup.Done() + +// message, err := funcPool.RemoveInstance(strconv.Itoa(8+fID), imageName, true) +// require.NoError(t, err, "Function returned error, "+message) +// }(fID, imageName) +// } +// vmGroup.Wait() +// } From 88ed5f76a86515dde5ea407efbcf82282e7163a8 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Wed, 24 Jan 2024 21:25:59 -0500 Subject: [PATCH 09/21] Test --- ctriface/iface.go | 5 +++++ memory/manager/manager.go | 1 + 2 files changed, 6 insertions(+) diff --git a/ctriface/iface.go b/ctriface/iface.go index 2e2892a33..6529d6cc1 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -207,6 +207,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa if o.GetUPFEnabled() { logger.Debug("Registering VM with the memory manager") + logger.Debugf("TEST (startWithEnv): current vmID used to registerVM is %v", vmID) stateCfg := manager.SnapshotStateCfg{ VMID: vmID, GuestMemPath: o.getMemoryFile(vmID), @@ -217,6 +218,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: resp.GetSocketPath(), } + logger.Debugf("TEST: show to-reg snapStat: %+v", stateCfg) logger.Debugf("TEST: show socket path: %s", resp.GetSocketPath()) if err := o.memoryManager.RegisterVM(stateCfg); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") @@ -570,6 +572,9 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, snapVmID string, vmID s if err := o.memoryManager.RegisterVM(stateCfg); err != nil { logger.Error(err, "failed to register new VM with memory manager") } + + + if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 375222c1c..2bb1766eb 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -129,6 +129,7 @@ func (m *MemoryManager) Activate(vmID string) error { m.Lock() + logger.Debug("TEST: Activate: fetch snapstate by vmID for UFFD") state, ok = m.instances[vmID] if !ok { m.Unlock() From 2ab905994161b4fbe89f5723f09829d6d4617172 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 29 Jan 2024 03:08:07 -0500 Subject: [PATCH 10/21] Test --- cri/firecracker/coordinator.go | 1 + ctriface/failing_test.go | 2 +- ctriface/iface.go | 11 +++++----- ctriface/manual_cleanup_test.go | 14 +++++++------ functions.go | 11 +++++----- memory/manager/manager.go | 37 +++++++++++++++++++++++++++++++-- 6 files changed, 56 insertions(+), 20 deletions(-) diff --git a/cri/firecracker/coordinator.go b/cri/firecracker/coordinator.go index b26389608..ff7879103 100644 --- a/cri/firecracker/coordinator.go +++ b/cri/firecracker/coordinator.go @@ -181,6 +181,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() + logger.Debug("FIXME: temp pass same lastVmID") resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, vmID, snap) if err != nil { logger.WithError(err).Error("failed to load VM") diff --git a/ctriface/failing_test.go b/ctriface/failing_test.go index 9048e42e5..d753c5df6 100644 --- a/ctriface/failing_test.go +++ b/ctriface/failing_test.go @@ -69,7 +69,7 @@ func TestStartSnapStop(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") - _, _, err = orch.LoadSnapshot(ctx, "", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/ctriface/iface.go b/ctriface/iface.go index 6529d6cc1..e39b0ad0a 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -220,7 +220,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa } logger.Debugf("TEST: show to-reg snapStat: %+v", stateCfg) logger.Debugf("TEST: show socket path: %s", resp.GetSocketPath()) - if err := o.memoryManager.RegisterVM(stateCfg); err != nil { + if err := o.memoryManager.RegisterVM(stateCfg, false, ""); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") // NOTE (Plamen): Potentially need a defer(DeregisteVM) here if RegisterVM is not last to execute } @@ -451,7 +451,7 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string, snap *sn } // LoadSnapshot Loads a snapshot of a VM -func (o *Orchestrator) LoadSnapshot(ctx context.Context, snapVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { +func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { var ( loadSnapshotMetric *metrics.Metric = metrics.NewMetric() tStart time.Time @@ -507,13 +507,13 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, snapVmID string, vmID s if o.GetUPFEnabled() { logger.Debug("TEST: UPF is enabled") conf.MemBackend.BackendType = uffdBackend - conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(snapVmID) + conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(lastVmID, true) logger.Debugf("TEST: the upf socket: %s", conf.MemBackend.BackendPath) if err != nil { return nil, nil, errors.Wrapf(err, "failed to get UPF socket path for uffd backend") } - if err := o.memoryManager.FetchState(snapVmID); err != nil { + if err := o.memoryManager.FetchState(lastVmID); err != nil { return nil, nil, err } } @@ -569,12 +569,11 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, snapVmID string, vmID s WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: newUPFSockPath, } - if err := o.memoryManager.RegisterVM(stateCfg); err != nil { + if err := o.memoryManager.RegisterVM(stateCfg, true, vmID); err != nil { logger.Error(err, "failed to register new VM with memory manager") } - if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index 21f57aaae..95488eb37 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -91,7 +91,7 @@ func TestSnapLoad(t *testing.T) { vmID = "2" - _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -152,7 +152,7 @@ func TestSnapLoadMultiple(t *testing.T) { vmID = "4" - _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "3", vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -163,7 +163,7 @@ func TestSnapLoadMultiple(t *testing.T) { vmID = "5" - _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "4", vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -234,10 +234,11 @@ func TestParallelSnapLoad(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to offload VM, "+vmID) + lastVmID := vmID vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, lastVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) _, err = orch.ResumeVM(ctx, vmID) @@ -357,9 +358,10 @@ func TestParallelPhasedSnapLoad(t *testing.T) { defer vmGroup.Done() vmID := fmt.Sprintf("%d", i+vmIDBase) snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + lastVmID := vmID vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err := orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err := orch.LoadSnapshot(ctx, lastVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) }(i) } @@ -482,7 +484,7 @@ func TestRemoteSnapLoad(t *testing.T) { snap := snapshotting.NewSnapshot(revision, remoteSnapshotsDir, testImageName) - _, _, err = orch.LoadSnapshot(ctx, vmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, "36", vmID, snap) require.NoError(t, err, "Failed to load remote snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/functions.go b/functions.go index 28c527917..28eadaf86 100644 --- a/functions.go +++ b/functions.go @@ -361,11 +361,12 @@ func (f *Function) AddInstance() *metrics.Metric { if f.isSnapshotReady { var resp *ctriface.StartVMResponse - // resp, metr = f.LoadInstance(f.getVMID()) - snapVmID := fmt.Sprintf("%s-%d", f.fID, f.lastInstanceID - 1) + + lastVmID := fmt.Sprintf("%s-%d", f.fID, f.lastInstanceID - 1) currVmID := f.getVMID() - resp, metr = f.LoadInstance(snapVmID, currVmID) + resp, metr = f.LoadInstance(lastVmID, currVmID) + f.guestIP = resp.GuestIP f.vmID = f.getVMID() f.lastInstanceID++ @@ -482,7 +483,7 @@ func (f *Function) CreateInstanceSnapshot() { // LoadInstance Loads a new instance of the function from its snapshot and resumes it // The tap, the shim and the vmID remain the same -func (f *Function) LoadInstance(snapVmID string, vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { +func (f *Function) LoadInstance(lastVmID string, vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { logger := log.WithFields(log.Fields{"fID": f.fID}) logger.Debug("Loading instance") @@ -495,7 +496,7 @@ func (f *Function) LoadInstance(snapVmID string, vmID string) (*ctriface.StartVM log.Panic(err) } - resp, loadMetr, err := orch.LoadSnapshot(ctx, snapVmID, vmID, snap) + resp, loadMetr, err := orch.LoadSnapshot(ctx, lastVmID, vmID, snap) if err != nil { log.Panic(err) } diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 2bb1766eb..8a5437785 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -53,6 +53,7 @@ type MemoryManager struct { sync.Mutex MemoryManagerCfg instances map[string]*SnapshotState // Indexed by vmID + origins map[string]string } // NewMemoryManager Initializes a new memory manager @@ -62,12 +63,13 @@ func NewMemoryManager(cfg MemoryManagerCfg) *MemoryManager { m := new(MemoryManager) m.instances = make(map[string]*SnapshotState) m.MemoryManagerCfg = cfg + m.origins = make(map[string]string) return m } // RegisterVM Registers a VM within the memory manager -func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg) error { +func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg, isSnapshotReady bool, originID string) error { m.Lock() defer m.Unlock() @@ -86,6 +88,10 @@ func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg) error { state := NewSnapshotState(cfg) m.instances[vmID] = state + // if isSnapshotReady { + // logger.Debugf("TEST: register current vmID %s with originID %s", vmID, originID) + // m.origins[vmID] = originID + // } return nil } @@ -130,7 +136,17 @@ func (m *MemoryManager) Activate(vmID string) error { m.Lock() logger.Debug("TEST: Activate: fetch snapstate by vmID for UFFD") + + // originID, ok := m.origins[vmID] + + // if !ok { + // logger.Debug("TEST: not loaded from snapshot") + // } + + // state, ok = m.instances[originID] + state, ok = m.instances[vmID] + if !ok { m.Unlock() logger.Error("VM not registered with the memory manager") @@ -178,6 +194,12 @@ func (m *MemoryManager) FetchState(vmID string) error { m.Lock() + // originID, ok := m.origins[vmID] + // if !ok { + // logger.Debug("TEST: not loaded from snapshot") + // } + // state, ok = m.instances[originID] + state, ok = m.instances[vmID] if !ok { m.Unlock() @@ -352,13 +374,24 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } -func (m *MemoryManager) GetUPFSockPath(vmID string) (string, error) { +func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { logger := log.WithFields(log.Fields{"vmID": vmID}) logger.Debug("Get the path of firecracker unix domain socket") m.Lock() + // id := "" + // if isSnapshotReady { + // logger.Debugf("TEST: to find originID by vmID %s", vmID) + // originID, ok := m.origins[vmID] + // if !ok { + // logger.Debug("TEST: not loaded from snapshot") + // } + // id = originID + // } + // state, ok := m.instances[id] + state, ok := m.instances[vmID] if !ok { m.Unlock() From 66c5f0c30a14418f0e008734d94c73b3df289167 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Thu, 1 Feb 2024 12:01:23 -0500 Subject: [PATCH 11/21] Update ctriface modue Signed-off-by: char-1ee --- cri/firecracker/coordinator.go | 6 +-- ctriface/Makefile | 6 +++ ctriface/failing_test.go | 79 +++++++++++++------------------ ctriface/iface.go | 29 +++++------- ctriface/iface_test.go | 50 ++++++++++++++++++++ ctriface/manual_cleanup_test.go | 8 ++-- ctriface/orch.go | 14 ++++++ functions.go | 13 ++--- memory/manager/manager.go | 81 +++++++++++++++----------------- memory/manager/snapshot_state.go | 25 ++++++++++ 10 files changed, 193 insertions(+), 118 deletions(-) diff --git a/cri/firecracker/coordinator.go b/cri/firecracker/coordinator.go index ff7879103..ec7ab2e1b 100644 --- a/cri/firecracker/coordinator.go +++ b/cri/firecracker/coordinator.go @@ -26,13 +26,14 @@ import ( "context" "errors" "fmt" - "github.com/google/uuid" - "github.com/vhive-serverless/vhive/snapshotting" "strconv" "sync" "sync/atomic" "time" + "github.com/google/uuid" + "github.com/vhive-serverless/vhive/snapshotting" + log "github.com/sirupsen/logrus" "github.com/vhive-serverless/vhive/ctriface" ) @@ -181,7 +182,6 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - logger.Debug("FIXME: temp pass same lastVmID") resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, vmID, snap) if err != nil { logger.WithError(err).Error("failed to load VM") diff --git a/ctriface/Makefile b/ctriface/Makefile index 6d2811be7..7b57901f1 100644 --- a/ctriface/Makefile +++ b/ctriface/Makefile @@ -28,6 +28,12 @@ WITHLAZY:=-lazy GOBENCH:=-v -timeout 1500s CTRDLOGDIR:=/tmp/ctrd-logs +debug: + ./../scripts/clean_fcctr.sh + sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/ctriface_log.out 2>$(CTRDLOGDIR)/ctriface_log.err & + sudo env "PATH=$(PATH)" go test $(EXTRATESTFILES) $(EXTRAGOARGS) -args $(WITHUPF) + ./../scripts/clean_fcctr.sh + test: ./../scripts/clean_fcctr.sh sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/ctriface_log.out 2>$(CTRDLOGDIR)/ctriface_log.err & diff --git a/ctriface/failing_test.go b/ctriface/failing_test.go index d753c5df6..36cb30a61 100644 --- a/ctriface/failing_test.go +++ b/ctriface/failing_test.go @@ -22,62 +22,49 @@ package ctriface -import ( - "context" - "os" - "testing" - "time" +// func TestStartSnapStop(t *testing.T) { +// // BROKEN BECAUSE StopVM does not work yet. +// // t.Skip("skipping failing test") +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging - ctrdlog "github.com/containerd/containerd/log" - "github.com/containerd/containerd/namespaces" - log "github.com/sirupsen/logrus" - "github.com/stretchr/testify/require" - "github.com/vhive-serverless/vhive/snapshotting" -) +// log.SetOutput(os.Stdout) -func TestStartSnapStop(t *testing.T) { - // BROKEN BECAUSE StopVM does not work yet. - t.Skip("skipping failing test") - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging +// log.SetLevel(log.DebugLevel) - log.SetOutput(os.Stdout) +// testTimeout := 120 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() - log.SetLevel(log.DebugLevel) +// orch := NewOrchestrator("devmapper", "", WithTestModeOn(true)) - testTimeout := 120 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() +// vmID := "2" - orch := NewOrchestrator("devmapper", "", WithTestModeOn(true)) +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM") - vmID := "2" +// err = orch.PauseVM(ctx, vmID) +// require.NoError(t, err, "Failed to pause VM") - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM") +// snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) +// err = orch.CreateSnapshot(ctx, vmID, snap) +// require.NoError(t, err, "Failed to create snapshot of VM") - err = orch.PauseVM(ctx, vmID) - require.NoError(t, err, "Failed to pause VM") +// err = orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") - snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) - err = orch.CreateSnapshot(ctx, vmID, snap) - require.NoError(t, err, "Failed to create snapshot of VM") +// _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) +// require.NoError(t, err, "Failed to load snapshot of VM") - err = orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") +// _, err = orch.ResumeVM(ctx, vmID) +// require.NoError(t, err, "Failed to resume VM") - _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) - require.NoError(t, err, "Failed to load snapshot of VM") +// err = orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") - _, err = orch.ResumeVM(ctx, vmID) - require.NoError(t, err, "Failed to resume VM") - - err = orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") - - _ = snap.Cleanup() - orch.Cleanup() -} +// _ = snap.Cleanup() +// orch.Cleanup() +// } diff --git a/ctriface/iface.go b/ctriface/iface.go index e39b0ad0a..ab66698ce 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -24,7 +24,6 @@ package ctriface import ( "context" - "github.com/vhive-serverless/vhive/snapshotting" "os" "os/exec" "path/filepath" @@ -33,6 +32,8 @@ import ( "syscall" "time" + "github.com/vhive-serverless/vhive/snapshotting" + log "github.com/sirupsen/logrus" "github.com/containerd/containerd" @@ -106,7 +107,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa tStart = time.Now() conf := o.getVMConfig(vm) - resp, err := o.fcClient.CreateVM(ctx, conf) + _, err = o.fcClient.CreateVM(ctx, conf) startVMMetric.MetricMap[metrics.FcCreateVM] = metrics.ToUS(time.Since(tStart)) if err != nil { return nil, nil, errors.Wrap(err, "failed to create the microVM in firecracker-containerd") @@ -216,11 +217,10 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa IsLazyMode: o.isLazyMode, VMMStatePath: o.getSnapshotFile(vmID), WorkingSetPath: o.getWorkingSetFile(vmID), - InstanceSockAddr: resp.GetSocketPath(), + InstanceSockAddr: o.uffdSockAddr, } logger.Debugf("TEST: show to-reg snapStat: %+v", stateCfg) - logger.Debugf("TEST: show socket path: %s", resp.GetSocketPath()) - if err := o.memoryManager.RegisterVM(stateCfg, false, ""); err != nil { + if err := o.memoryManager.RegisterVM(stateCfg); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") // NOTE (Plamen): Potentially need a defer(DeregisteVM) here if RegisterVM is not last to execute } @@ -451,7 +451,7 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string, snap *sn } // LoadSnapshot Loads a snapshot of a VM -func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { +func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID string, snap *snapshotting.Snapshot) (_ *StartVMResponse, _ *metrics.Metric, retErr error) { var ( loadSnapshotMetric *metrics.Metric = metrics.NewMetric() tStart time.Time @@ -499,7 +499,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID s conf.LoadSnapshot = true conf.SnapshotPath = snap.GetSnapshotFilePath() conf.ContainerSnapshotPath = containerSnap.GetDevicePath() - conf.MemBackend = &proto.MemoryBackend { + conf.MemBackend = &proto.MemoryBackend{ BackendType: fileBackend, BackendPath: snap.GetMemFilePath(), } @@ -507,24 +507,23 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID s if o.GetUPFEnabled() { logger.Debug("TEST: UPF is enabled") conf.MemBackend.BackendType = uffdBackend - conf.MemBackend.BackendPath, err = o.memoryManager.GetUPFSockPath(lastVmID, true) + conf.MemBackend.BackendPath = o.uffdSockAddr logger.Debugf("TEST: the upf socket: %s", conf.MemBackend.BackendPath) if err != nil { return nil, nil, errors.Wrapf(err, "failed to get UPF socket path for uffd backend") } - if err := o.memoryManager.FetchState(lastVmID); err != nil { + if err := o.memoryManager.FetchState(originVmID); err != nil { return nil, nil, err } } tStart = time.Now() - newUPFSockPath := "" go func() { defer close(loadDone) - resp, loadErr := o.fcClient.CreateVM(ctx, conf) + _, loadErr := o.fcClient.CreateVM(ctx, conf) if loadErr != nil { logger.Error("Failed to load snapshot of the VM: ", loadErr) logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) @@ -551,12 +550,11 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID s } logger.Error(snapFiles) } - newUPFSockPath = resp.GetSocketPath() }() logger.Debug("TEST: CreatVM request sent") if o.GetUPFEnabled() { - + logger.Debug("TEST: Registering VM with the memory manager") stateCfg := manager.SnapshotStateCfg{ @@ -567,13 +565,12 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, lastVmID string, vmID s IsLazyMode: o.isLazyMode, VMMStatePath: o.getSnapshotFile(vmID), WorkingSetPath: o.getWorkingSetFile(vmID), - InstanceSockAddr: newUPFSockPath, + InstanceSockAddr: o.uffdSockAddr, } - if err := o.memoryManager.RegisterVM(stateCfg, true, vmID); err != nil { + if err := o.memoryManager.RegisterVM(stateCfg); err != nil { logger.Error(err, "failed to register new VM with memory manager") } - if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index c41a108fb..ad6ffee57 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -50,6 +50,56 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// Test for ctriface uffd feature +func TestStartSnapStop(t *testing.T) { + // BROKEN BECAUSE StopVM does not work yet. + // t.Skip("skipping failing test") + log.SetFormatter(&log.TextFormatter{ + TimestampFormat: ctrdlog.RFC3339NanoFixed, + FullTimestamp: true, + }) + //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging + + log.SetOutput(os.Stdout) + + log.SetLevel(log.DebugLevel) + + testTimeout := 120 * time.Second + ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) + defer cancel() + + orch := NewOrchestrator("devmapper", "", WithTestModeOn(true)) + + vmID := "2" + + _, _, err := orch.StartVM(ctx, vmID, testImageName) + require.NoError(t, err, "Failed to start VM") + + err = orch.PauseVM(ctx, vmID) + require.NoError(t, err, "Failed to pause VM") + + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) + require.NoError(t, err, "Failed to create snapshot of VM") + + err = orch.StopSingleVM(ctx, vmID) + require.NoError(t, err, "Failed to stop VM") + + _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) + require.NoError(t, err, "Failed to load snapshot of VM") + + _, err = orch.ResumeVM(ctx, vmID) + require.NoError(t, err, "Failed to resume VM") + + time.Sleep(30 * time.Second) + + err = orch.StopSingleVM(ctx, vmID) + require.NoError(t, err, "Failed to stop VM") + + _ = snap.Cleanup() + orch.Cleanup() +} + func TestPauseSnapResume(t *testing.T) { log.SetFormatter(&log.TextFormatter{ TimestampFormat: ctrdlog.RFC3339NanoFixed, diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index 95488eb37..0536ea6bd 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -234,11 +234,11 @@ func TestParallelSnapLoad(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to offload VM, "+vmID) - lastVmID := vmID + originVmID := vmID vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err = orch.LoadSnapshot(ctx, lastVmID, vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) _, err = orch.ResumeVM(ctx, vmID) @@ -358,10 +358,10 @@ func TestParallelPhasedSnapLoad(t *testing.T) { defer vmGroup.Done() vmID := fmt.Sprintf("%d", i+vmIDBase) snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) - lastVmID := vmID + originVmID := vmID vmIDInt, _ := strconv.Atoi(vmID) vmID = strconv.Itoa(vmIDInt + 1) - _, _, err := orch.LoadSnapshot(ctx, lastVmID, vmID, snap) + _, _, err := orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) }(i) } diff --git a/ctriface/orch.go b/ctriface/orch.go index d7dc5df7b..e481b7296 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -88,6 +88,7 @@ type Orchestrator struct { isUPFEnabled bool isLazyMode bool snapshotsDir string + uffdSockAddr string isMetricsMode bool netPoolSize int @@ -121,8 +122,16 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) } if o.GetUPFEnabled() { + // o.uffdSockAddr = "/tmp/uffd.sock" // "/tmp/uffd/firecracker-containerd#3-0/uffd.sock" + _, err = os.Create("/tmp/uffd.sock") + + if err != nil { + log.Fatal("TEST: failed to create uffd sock", err) + } + managerCfg := manager.MemoryManagerCfg{ MetricsModeOn: o.isMetricsMode, + UffdSockAddr: o.uffdSockAddr, } o.memoryManager = manager.NewMemoryManager(managerCfg) } @@ -208,6 +217,11 @@ func (o *Orchestrator) GetSnapshotsDir() string { return o.snapshotsDir } +// TODO: /tmp/uffd/firecracker-containerd#3-0/uffd.sock +func (o *Orchestrator) getUffdSockAddr(vmID string) string { + return filepath.Join(o.getVMBaseDir(vmID), "uffd.sock") +} + func (o *Orchestrator) getSnapshotFile(vmID string) string { return filepath.Join(o.getVMBaseDir(vmID), "snap_file") } diff --git a/functions.go b/functions.go index 28eadaf86..290815a84 100644 --- a/functions.go +++ b/functions.go @@ -25,7 +25,6 @@ package main import ( "context" "fmt" - "github.com/vhive-serverless/vhive/ctriface" "math/rand" "net" "os" @@ -35,6 +34,8 @@ import ( "syscall" "time" + "github.com/vhive-serverless/vhive/ctriface" + "golang.org/x/sync/semaphore" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -362,10 +363,10 @@ func (f *Function) AddInstance() *metrics.Metric { if f.isSnapshotReady { var resp *ctriface.StartVMResponse // resp, metr = f.LoadInstance(f.getVMID()) - - lastVmID := fmt.Sprintf("%s-%d", f.fID, f.lastInstanceID - 1) + + originVmID := fmt.Sprintf("%s-%d", f.fID, f.lastInstanceID-1) currVmID := f.getVMID() - resp, metr = f.LoadInstance(lastVmID, currVmID) + resp, metr = f.LoadInstance(originVmID, currVmID) f.guestIP = resp.GuestIP f.vmID = f.getVMID() @@ -483,7 +484,7 @@ func (f *Function) CreateInstanceSnapshot() { // LoadInstance Loads a new instance of the function from its snapshot and resumes it // The tap, the shim and the vmID remain the same -func (f *Function) LoadInstance(lastVmID string, vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { +func (f *Function) LoadInstance(originVmID string, vmID string) (*ctriface.StartVMResponse, *metrics.Metric) { logger := log.WithFields(log.Fields{"fID": f.fID}) logger.Debug("Loading instance") @@ -496,7 +497,7 @@ func (f *Function) LoadInstance(lastVmID string, vmID string) (*ctriface.StartVM log.Panic(err) } - resp, loadMetr, err := orch.LoadSnapshot(ctx, lastVmID, vmID, snap) + resp, loadMetr, err := orch.LoadSnapshot(ctx, originVmID, vmID, snap) if err != nil { log.Panic(err) } diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 8a5437785..29595530a 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -46,6 +46,7 @@ const ( // MemoryManagerCfg Global config of the manager type MemoryManagerCfg struct { MetricsModeOn bool + UffdSockAddr string // it could not be appropriate to put sock here } // MemoryManager Serves page faults coming from VMs @@ -53,7 +54,6 @@ type MemoryManager struct { sync.Mutex MemoryManagerCfg instances map[string]*SnapshotState // Indexed by vmID - origins map[string]string } // NewMemoryManager Initializes a new memory manager @@ -63,13 +63,12 @@ func NewMemoryManager(cfg MemoryManagerCfg) *MemoryManager { m := new(MemoryManager) m.instances = make(map[string]*SnapshotState) m.MemoryManagerCfg = cfg - m.origins = make(map[string]string) - + return m } // RegisterVM Registers a VM within the memory manager -func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg, isSnapshotReady bool, originID string) error { +func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg) error { m.Lock() defer m.Unlock() @@ -88,11 +87,6 @@ func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg, isSnapshotReady bool, o state := NewSnapshotState(cfg) m.instances[vmID] = state - // if isSnapshotReady { - // logger.Debugf("TEST: register current vmID %s with originID %s", vmID, originID) - // m.origins[vmID] = originID - // } - return nil } @@ -374,40 +368,41 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } -func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { - logger := log.WithFields(log.Fields{"vmID": vmID}) - - logger.Debug("Get the path of firecracker unix domain socket") - - m.Lock() - - // id := "" - // if isSnapshotReady { - // logger.Debugf("TEST: to find originID by vmID %s", vmID) - // originID, ok := m.origins[vmID] - // if !ok { - // logger.Debug("TEST: not loaded from snapshot") - // } - // id = originID - // } - // state, ok := m.instances[id] - - state, ok := m.instances[vmID] - if !ok { - m.Unlock() - logger.Error("VM not registered with the memory manager") - return "", errors.New("VM not registered with the memory manager") - } - - m.Unlock() - - if state.isActive { - logger.Error("Cannot get stats while VM is active") - return "", errors.New("Cannot get stats while VM is active") - } - - return m.instances[vmID].SnapshotStateCfg.InstanceSockAddr, nil -} +// Deprecated +// func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { +// logger := log.WithFields(log.Fields{"vmID": vmID}) + +// logger.Debug("Get the path of firecracker unix domain socket") + +// m.Lock() + +// // id := "" +// // if isSnapshotReady { +// // logger.Debugf("TEST: to find originID by vmID %s", vmID) +// // originID, ok := m.origins[vmID] +// // if !ok { +// // logger.Debug("TEST: not loaded from snapshot") +// // } +// // id = originID +// // } +// // state, ok := m.instances[id] + +// state, ok := m.instances[vmID] +// if !ok { +// m.Unlock() +// logger.Error("VM not registered with the memory manager") +// return "", errors.New("VM not registered with the memory manager") +// } + +// m.Unlock() + +// if state.isActive { +// logger.Error("Cannot get stats while VM is active") +// return "", errors.New("Cannot get stats while VM is active") +// } + +// return m.instances[vmID].SnapshotStateCfg.InstanceSockAddr, nil +// } func getLazyHeaderStats(state *SnapshotState, functionName string) ([]string, []string) { header := []string{ diff --git a/memory/manager/snapshot_state.go b/memory/manager/snapshot_state.go index 7cce3e2af..ad23d2e97 100644 --- a/memory/manager/snapshot_state.go +++ b/memory/manager/snapshot_state.go @@ -49,6 +49,14 @@ import ( "unsafe" ) +// TODO: for test logging +type TestPageFault struct { + src uint64 + dst uint64 + mode uint64 + offset uint64 +} + // SnapshotStateCfg Config to initialize SnapshotState type SnapshotStateCfg struct { VMID string @@ -140,6 +148,7 @@ func (s *SnapshotState) getUFFD() error { time.Sleep(1 * time.Millisecond) continue } + log.Debugf("TEST: Dial uffd socket done: %s", s.InstanceSockAddr) defer c.Close() @@ -301,6 +310,7 @@ func (s *SnapshotState) pollUserPageFaults(readyCh chan int) { panic("Wrong number of events") } + logger.Debugf("TEST: epoller found %d event", nevents) for i := 0; i < nevents; i++ { event := events[i] @@ -374,14 +384,21 @@ func (s *SnapshotState) servePageFault(fd int, address uint64) error { workingSetInstalled bool ) + // log.SetOutput(os.Stdout) + // log.SetLevel(log.DebugLevel) + + log.Debugf("TEST: servePageFault(fd: %d, address: %d)", fd, address) + s.firstPageFaultOnce.Do( func() { s.startAddress = address + log.Debugf("TEST: first page fault address %d", address) if s.isRecordReady && !s.IsLazyMode { if s.metricsModeOn { tStart = time.Now() } + log.Debug("TEST: first page fault once installation") s.installWorkingSetPages(fd) if s.metricsModeOn { s.currentMetric.MetricMap[installWSMetric] = metrics.ToUS(time.Since(tStart)) @@ -401,6 +418,13 @@ func (s *SnapshotState) servePageFault(fd int, address uint64) error { dst := uint64(int64(address) & ^(int64(os.Getpagesize()) - 1)) mode := uint64(0) + testPF := TestPageFault{ + src: src, + dst: dst, + mode: mode, + offset: offset, + } + rec := Record{ offset: offset, } @@ -427,6 +451,7 @@ func (s *SnapshotState) servePageFault(fd int, address uint64) error { tStart = time.Now() } + log.Debugf("TEST: install happen for %v", testPF) err := installRegion(fd, src, dst, mode, 1) if s.metricsModeOn { From 58a7040753c5cdff1e1223ff285f6adc4ca89110 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 5 Feb 2024 02:11:52 -0700 Subject: [PATCH 12/21] Refactor Signed-off-by: char-1ee --- cri/firecracker/coordinator.go | 3 ++- ctriface/iface.go | 7 +++---- ctriface/iface_test.go | 9 +++++---- ctriface/manual_cleanup_test.go | 3 ++- memory/manager/manager.go | 27 +++++++++++++++++++++++++++ memory/manager/snapshot_state.go | 10 ++++++++++ 6 files changed, 49 insertions(+), 10 deletions(-) diff --git a/cri/firecracker/coordinator.go b/cri/firecracker/coordinator.go index ec7ab2e1b..a8e5c6c60 100644 --- a/cri/firecracker/coordinator.go +++ b/cri/firecracker/coordinator.go @@ -170,6 +170,7 @@ func (c *coordinator) orchStartVM(ctx context.Context, image, revision string, e func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.Snapshot) (*funcInstance, error) { vmID := c.getVMID() + originVmID := vmID logger := log.WithFields( log.Fields{ "vmID": vmID, @@ -182,7 +183,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - resp, _, err := c.orch.LoadSnapshot(ctxTimeout, vmID, vmID, snap) + resp, _, err := c.orch.LoadSnapshot(ctxTimeout, originVmID, vmID, snap) if err != nil { logger.WithError(err).Error("failed to load VM") return nil, err diff --git a/ctriface/iface.go b/ctriface/iface.go index ab66698ce..4a335c9ef 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -219,7 +219,7 @@ func (o *Orchestrator) StartVMWithEnvironment(ctx context.Context, vmID, imageNa WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: o.uffdSockAddr, } - logger.Debugf("TEST: show to-reg snapStat: %+v", stateCfg) + logger.Debugf("TEST: show snapStat to be registered: %+v", stateCfg) if err := o.memoryManager.RegisterVM(stateCfg); err != nil { return nil, nil, errors.Wrap(err, "failed to register VM with memory manager") // NOTE (Plamen): Potentially need a defer(DeregisteVM) here if RegisterVM is not last to execute @@ -523,8 +523,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID go func() { defer close(loadDone) - _, loadErr := o.fcClient.CreateVM(ctx, conf) - if loadErr != nil { + if _, loadErr := o.fcClient.CreateVM(ctx, conf); loadErr != nil { logger.Error("Failed to load snapshot of the VM: ", loadErr) logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) files, err := os.ReadDir(filepath.Dir(snap.GetSnapshotFilePath())) @@ -567,7 +566,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID WorkingSetPath: o.getWorkingSetFile(vmID), InstanceSockAddr: o.uffdSockAddr, } - if err := o.memoryManager.RegisterVM(stateCfg); err != nil { + if err := o.memoryManager.RegisterVMFromSnap(originVmID, stateCfg); err != nil { logger.Error(err, "failed to register new VM with memory manager") } diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index ad6ffee57..5f92a7d3f 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -25,7 +25,7 @@ import ( "context" "flag" "fmt" - "os" + "os" "sync" "testing" "time" @@ -71,6 +71,7 @@ func TestStartSnapStop(t *testing.T) { orch := NewOrchestrator("devmapper", "", WithTestModeOn(true)) vmID := "2" + newVmID := "3" _, _, err := orch.StartVM(ctx, vmID, testImageName) require.NoError(t, err, "Failed to start VM") @@ -85,15 +86,15 @@ func TestStartSnapStop(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") - _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, vmID, newVmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") - _, err = orch.ResumeVM(ctx, vmID) + _, err = orch.ResumeVM(ctx, newVmID) require.NoError(t, err, "Failed to resume VM") time.Sleep(30 * time.Second) - err = orch.StopSingleVM(ctx, vmID) + err = orch.StopSingleVM(ctx, newVmID) require.NoError(t, err, "Failed to stop VM") _ = snap.Cleanup() diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index 0536ea6bd..62fbfe1a4 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -90,8 +90,9 @@ func TestSnapLoad(t *testing.T) { require.NoError(t, err, "Failed to offload VM") vmID = "2" + originVmID := "1" - _, _, err = orch.LoadSnapshot(ctx, "1", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 29595530a..910077b49 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -54,6 +54,7 @@ type MemoryManager struct { sync.Mutex MemoryManagerCfg instances map[string]*SnapshotState // Indexed by vmID + origins map[string]string // Track parent vm for vm loaded from snapshot } // NewMemoryManager Initializes a new memory manager @@ -90,6 +91,31 @@ func (m *MemoryManager) RegisterVM(cfg SnapshotStateCfg) error { return nil } +// RegisterVMFromSnap Registers a VM that is loaded from snapshot within the memory manager +func (m *MemoryManager) RegisterVMFromSnap(originVmID string, cfg SnapshotStateCfg) error { + m.Lock() + defer m.Unlock() + + vmID := cfg.VMID + + logger := log.WithFields(log.Fields{"vmID": vmID}) + + logger.Debug("Registering the VM that loaded snapshot with the memory manager") + + if _, ok := m.instances[vmID]; ok { + logger.Error("VM already registered with the memory manager") + return errors.New("VM already registered with the memory manager") + } + + cfg.metricsModeOn = m.MetricsModeOn + state := NewSnapshotState(cfg) + // state := m.instances[originVmID] + + m.origins[vmID] = originVmID + m.instances[vmID] = state + return nil +} + // DeregisterVM Deregisters a VM from the memory manager func (m *MemoryManager) DeregisterVM(vmID string) error { m.Lock() @@ -111,6 +137,7 @@ func (m *MemoryManager) DeregisterVM(vmID string) error { } delete(m.instances, vmID) + delete(m.origins, vmID) return nil } diff --git a/memory/manager/snapshot_state.go b/memory/manager/snapshot_state.go index ad23d2e97..98fc14117 100644 --- a/memory/manager/snapshot_state.go +++ b/memory/manager/snapshot_state.go @@ -288,7 +288,17 @@ func (s *SnapshotState) pollUserPageFaults(readyCh chan int) { logger.Fatalf("register_epoller: %v", err) } + // TODO: config where the logger stream goes logger.Debug("Starting polling loop") + fmt.Printf("Starting polling loop") + + logFile, err := os.OpenFile("pg_happen.log", os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0666) + if err != nil { + log.Fatalf("error opening file: %v", err) + } + defer logFile.Close() + log.SetOutput(logFile) + log.Println("This is a test log entry") defer syscall.Close(s.epfd) From 1082679d0de9da8104fc09b33c9450d3aedb3a39 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 5 Feb 2024 02:29:20 -0700 Subject: [PATCH 13/21] Refactor the uffd socket path Signed-off-by: char-1ee --- ctriface/iface_test.go | 3 ++- ctriface/orch.go | 5 ++--- ctriface/orch_options.go | 7 +++++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 5f92a7d3f..30b680204 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -68,7 +68,8 @@ func TestStartSnapStop(t *testing.T) { ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) defer cancel() - orch := NewOrchestrator("devmapper", "", WithTestModeOn(true)) + uffdSockAddr := "/tmp/uffd.sock" + orch := NewOrchestrator("devmapper", "", WithTestModeOn(true), WithUffdSockAddr(uffdSockAddr)) vmID := "2" newVmID := "3" diff --git a/ctriface/orch.go b/ctriface/orch.go index e481b7296..43b2dc233 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -122,9 +122,8 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) } if o.GetUPFEnabled() { - // o.uffdSockAddr = "/tmp/uffd.sock" // "/tmp/uffd/firecracker-containerd#3-0/uffd.sock" - _, err = os.Create("/tmp/uffd.sock") - + // o.uffdSockAddr = "/tmp/uffd/firecracker-containerd#3-0/uffd.sock" + _, err = os.Create(o.uffdSockAddr) if err != nil { log.Fatal("TEST: failed to create uffd sock", err) } diff --git a/ctriface/orch_options.go b/ctriface/orch_options.go index 8e9896d5f..0446d945d 100644 --- a/ctriface/orch_options.go +++ b/ctriface/orch_options.go @@ -49,6 +49,13 @@ func WithUPF(isUPFEnabled bool) OrchestratorOption { } } +// WithUffdSockAddr Sets the socket path for Uffd communication +func WithUffdSockAddr(uffdSockAddr string) OrchestratorOption { + return func(o *Orchestrator) { + o.uffdSockAddr = uffdSockAddr + } +} + // WithSnapshotsDir Sets the directory where // snapshots should be stored func WithSnapshotsDir(snapshotsDir string) OrchestratorOption { From 6abf1987f44bd0738d8b525ee3cc8812734fb363 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 5 Feb 2024 04:00:24 -0700 Subject: [PATCH 14/21] Refactor Signed-off-by: char-1ee --- ctriface/iface_test.go | 10 ++++++---- ctriface/manual_cleanup_test.go | 11 +++++++---- ctriface/orch.go | 1 - memory/manager/manager.go | 2 ++ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 30b680204..47c377e88 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -72,7 +72,6 @@ func TestStartSnapStop(t *testing.T) { orch := NewOrchestrator("devmapper", "", WithTestModeOn(true), WithUffdSockAddr(uffdSockAddr)) vmID := "2" - newVmID := "3" _, _, err := orch.StartVM(ctx, vmID, testImageName) require.NoError(t, err, "Failed to start VM") @@ -87,15 +86,18 @@ func TestStartSnapStop(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") - _, _, err = orch.LoadSnapshot(ctx, vmID, newVmID, snap) + originVmID := vmID + vmID = "3" + + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") - _, err = orch.ResumeVM(ctx, newVmID) + _, err = orch.ResumeVM(ctx, vmID) require.NoError(t, err, "Failed to resume VM") time.Sleep(30 * time.Second) - err = orch.StopSingleVM(ctx, newVmID) + err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") _ = snap.Cleanup() diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index 62fbfe1a4..505247421 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -151,9 +151,10 @@ func TestSnapLoadMultiple(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to offload VM") + originVmID := vmID vmID = "4" - _, _, err = orch.LoadSnapshot(ctx, "3", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -162,9 +163,10 @@ func TestSnapLoadMultiple(t *testing.T) { err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to offload VM") + originVmID = vmID vmID = "5" - _, _, err = orch.LoadSnapshot(ctx, "4", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -469,8 +471,9 @@ func TestRemoteSnapLoad(t *testing.T) { ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) defer cancel() - vmID := "37" revision := "myrev-37" + originVmID := "37" + vmID := "38" _, err := os.Stat(remoteSnapshotsDir) require.NoError(t, err, "Failed to stat remote snapshots directory") @@ -485,7 +488,7 @@ func TestRemoteSnapLoad(t *testing.T) { snap := snapshotting.NewSnapshot(revision, remoteSnapshotsDir, testImageName) - _, _, err = orch.LoadSnapshot(ctx, "36", vmID, snap) + _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load remote snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/ctriface/orch.go b/ctriface/orch.go index 43b2dc233..121ddd2f8 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -122,7 +122,6 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) } if o.GetUPFEnabled() { - // o.uffdSockAddr = "/tmp/uffd/firecracker-containerd#3-0/uffd.sock" _, err = os.Create(o.uffdSockAddr) if err != nil { log.Fatal("TEST: failed to create uffd sock", err) diff --git a/memory/manager/manager.go b/memory/manager/manager.go index 910077b49..c51e251d4 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -55,6 +55,8 @@ type MemoryManager struct { MemoryManagerCfg instances map[string]*SnapshotState // Indexed by vmID origins map[string]string // Track parent vm for vm loaded from snapshot + + // TODO: snapshot and its children } // NewMemoryManager Initializes a new memory manager From 97f714e375ad4facf300c70943491861b7562051 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Tue, 20 Feb 2024 10:04:37 -0700 Subject: [PATCH 15/21] Test --- ctriface/iface.go | 11 +- ctriface/iface_test.go | 533 ++++++++++++++++--------------- ctriface/orch.go | 16 +- go.mod | 2 + go.sum | 2 - lg/uni_logger.go | 17 + memory/manager/manager.go | 11 +- memory/manager/snapshot_state.go | 11 +- 8 files changed, 318 insertions(+), 285 deletions(-) create mode 100644 lg/uni_logger.go diff --git a/ctriface/iface.go b/ctriface/iface.go index 4a335c9ef..298546742 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -24,6 +24,7 @@ package ctriface import ( "context" + "encoding/json" "os" "os/exec" "path/filepath" @@ -523,6 +524,9 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID go func() { defer close(loadDone) + confStr, _ := json.Marshal(conf) + logger.Debugf("TEST: CreateVM request: %s", confStr) + if _, loadErr := o.fcClient.CreateVM(ctx, conf); loadErr != nil { logger.Error("Failed to load snapshot of the VM: ", loadErr) logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) @@ -552,9 +556,11 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID }() logger.Debug("TEST: CreatVM request sent") + <-loadDone + if o.GetUPFEnabled() { - logger.Debug("TEST: Registering VM with the memory manager") + logger.Debug("TEST: Registering VM with snap with the memory manager") stateCfg := manager.SnapshotStateCfg{ VMID: vmID, @@ -570,12 +576,13 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID logger.Error(err, "failed to register new VM with memory manager") } + logger.Debug("TEST: activate VM in mm") if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } } - <-loadDone + // <-loadDone loadSnapshotMetric.MetricMap[metrics.LoadVMM] = metrics.ToUS(time.Since(tStart)) diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 47c377e88..677dc9d1e 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -24,9 +24,7 @@ package ctriface import ( "context" "flag" - "fmt" - "os" - "sync" + "os" "testing" "time" @@ -35,6 +33,8 @@ import ( log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "github.com/vhive-serverless/vhive/snapshotting" + + "github.com/vhive-serverless/vhive/lg" ) // TODO: Make it impossible to use lazy mode without UPF @@ -68,8 +68,18 @@ func TestStartSnapStop(t *testing.T) { ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) defer cancel() + // uffdSockAddr := "/tmp/uffd.sock" + // uffdSockDir := "/home/char/uffd" + // uffdSockAddr := uffdSockDir + "/uffd.sock" uffdSockAddr := "/tmp/uffd.sock" - orch := NewOrchestrator("devmapper", "", WithTestModeOn(true), WithUffdSockAddr(uffdSockAddr)) + orch := NewOrchestrator( + "devmapper", + "", + WithTestModeOn(true), + WithSnapshots(true), + WithUPF(*isUPFEnabled), + WithUffdSockAddr(uffdSockAddr), + ) vmID := "2" @@ -83,6 +93,9 @@ func TestStartSnapStop(t *testing.T) { err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") + // _, err = orch.ResumeVM(ctx, vmID) + // require.NoError(t, err, "Failed to resume VM after created snapshot") + err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") @@ -92,6 +105,8 @@ func TestStartSnapStop(t *testing.T) { _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") + log.Debug("TEST: LoadSnapshot completed") + lg.UniLogger.Println("This is a test") _, err = orch.ResumeVM(ctx, vmID) require.NoError(t, err, "Failed to resume VM") @@ -104,275 +119,275 @@ func TestStartSnapStop(t *testing.T) { orch.Cleanup() } -func TestPauseSnapResume(t *testing.T) { - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging - - log.SetOutput(os.Stdout) +// func TestPauseSnapResume(t *testing.T) { +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging - log.SetLevel(log.DebugLevel) - - testTimeout := 120 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() - - orch := NewOrchestrator( - "devmapper", - "", - WithTestModeOn(true), - WithUPF(*isUPFEnabled), - WithLazyMode(*isLazyMode), - ) - - vmID := "4" - revision := "myrev-4" - - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM") - - err = orch.PauseVM(ctx, vmID) - require.NoError(t, err, "Failed to pause VM") - - snap := snapshotting.NewSnapshot(revision, "/fccd/snapshots", testImageName) - err = snap.CreateSnapDir() - require.NoError(t, err, "Failed to create snapshots directory") - - err = orch.CreateSnapshot(ctx, vmID, snap) - require.NoError(t, err, "Failed to create snapshot of VM") +// log.SetOutput(os.Stdout) - _, err = orch.ResumeVM(ctx, vmID) - require.NoError(t, err, "Failed to resume VM") +// log.SetLevel(log.DebugLevel) - err = orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") +// testTimeout := 120 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() - _ = snap.Cleanup() - orch.Cleanup() -} +// orch := NewOrchestrator( +// "devmapper", +// "", +// WithTestModeOn(true), +// WithUPF(*isUPFEnabled), +// WithLazyMode(*isLazyMode), +// ) -func TestStartStopSerial(t *testing.T) { - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging +// vmID := "4" +// revision := "myrev-4" - log.SetOutput(os.Stdout) +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM") - log.SetLevel(log.InfoLevel) +// err = orch.PauseVM(ctx, vmID) +// require.NoError(t, err, "Failed to pause VM") - testTimeout := 120 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() +// snap := snapshotting.NewSnapshot(revision, "/fccd/snapshots", testImageName) +// err = snap.CreateSnapDir() +// require.NoError(t, err, "Failed to create snapshots directory") - orch := NewOrchestrator( - "devmapper", - "", - WithTestModeOn(true), - WithUPF(*isUPFEnabled), - WithLazyMode(*isLazyMode), - ) +// err = orch.CreateSnapshot(ctx, vmID, snap) +// require.NoError(t, err, "Failed to create snapshot of VM") - vmID := "5" +// _, err = orch.ResumeVM(ctx, vmID) +// require.NoError(t, err, "Failed to resume VM") - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM") +// err = orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") - err = orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") +// _ = snap.Cleanup() +// orch.Cleanup() +// } - orch.Cleanup() -} +// func TestStartStopSerial(t *testing.T) { +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging -func TestPauseResumeSerial(t *testing.T) { - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging +// log.SetOutput(os.Stdout) - log.SetOutput(os.Stdout) +// log.SetLevel(log.InfoLevel) - log.SetLevel(log.InfoLevel) +// testTimeout := 120 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() - testTimeout := 120 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() - - orch := NewOrchestrator( - "devmapper", - "", - WithTestModeOn(true), - WithUPF(*isUPFEnabled), - WithLazyMode(*isLazyMode), - ) - - vmID := "6" - - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM") - - err = orch.PauseVM(ctx, vmID) - require.NoError(t, err, "Failed to pause VM") - - _, err = orch.ResumeVM(ctx, vmID) - require.NoError(t, err, "Failed to resume VM") - - err = orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") - - orch.Cleanup() -} - -func TestStartStopParallel(t *testing.T) { - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging - - log.SetOutput(os.Stdout) - - log.SetLevel(log.InfoLevel) - - testTimeout := 360 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() - - vmNum := 10 - vmIDBase := 7 - - orch := NewOrchestrator( - "devmapper", - "", - WithTestModeOn(true), - WithUPF(*isUPFEnabled), - WithLazyMode(*isLazyMode), - ) - - // Pull image - _, err := orch.getImage(ctx, testImageName) - require.NoError(t, err, "Failed to pull image "+testImageName) - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM "+vmID) - }(i) - } - vmGroup.Wait() - } - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - err := orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM "+vmID) - }(i) - } - vmGroup.Wait() - } - - orch.Cleanup() -} - -func TestPauseResumeParallel(t *testing.T) { - log.SetFormatter(&log.TextFormatter{ - TimestampFormat: ctrdlog.RFC3339NanoFixed, - FullTimestamp: true, - }) - //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging - - log.SetOutput(os.Stdout) - - log.SetLevel(log.InfoLevel) - - testTimeout := 120 * time.Second - ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) - defer cancel() - - vmNum := 10 - vmIDBase := 17 - - orch := NewOrchestrator( - "devmapper", - "", - WithTestModeOn(true), - WithUPF(*isUPFEnabled), - WithLazyMode(*isLazyMode), - ) - - // Pull image - _, err := orch.getImage(ctx, testImageName) - require.NoError(t, err, "Failed to pull image "+testImageName) - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - _, _, err := orch.StartVM(ctx, vmID, testImageName) - require.NoError(t, err, "Failed to start VM") - }(i) - } - vmGroup.Wait() - } - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - err := orch.PauseVM(ctx, vmID) - require.NoError(t, err, "Failed to pause VM") - }(i) - } - vmGroup.Wait() - } - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - _, err := orch.ResumeVM(ctx, vmID) - require.NoError(t, err, "Failed to resume VM") - }(i) - } - vmGroup.Wait() - } - - { - var vmGroup sync.WaitGroup - for i := vmIDBase; i < vmNum; i++ { - vmGroup.Add(1) - go func(i int) { - defer vmGroup.Done() - vmID := fmt.Sprintf("%d", i) - err := orch.StopSingleVM(ctx, vmID) - require.NoError(t, err, "Failed to stop VM") - }(i) - } - vmGroup.Wait() - } - - orch.Cleanup() -} +// orch := NewOrchestrator( +// "devmapper", +// "", +// WithTestModeOn(true), +// WithUPF(*isUPFEnabled), +// WithLazyMode(*isLazyMode), +// ) + +// vmID := "5" + +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM") + +// err = orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") + +// orch.Cleanup() +// } + +// func TestPauseResumeSerial(t *testing.T) { +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging + +// log.SetOutput(os.Stdout) + +// log.SetLevel(log.InfoLevel) + +// testTimeout := 120 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() + +// orch := NewOrchestrator( +// "devmapper", +// "", +// WithTestModeOn(true), +// WithUPF(*isUPFEnabled), +// WithLazyMode(*isLazyMode), +// ) + +// vmID := "6" + +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM") + +// err = orch.PauseVM(ctx, vmID) +// require.NoError(t, err, "Failed to pause VM") + +// _, err = orch.ResumeVM(ctx, vmID) +// require.NoError(t, err, "Failed to resume VM") + +// err = orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") + +// orch.Cleanup() +// } + +// func TestStartStopParallel(t *testing.T) { +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging + +// log.SetOutput(os.Stdout) + +// log.SetLevel(log.InfoLevel) + +// testTimeout := 360 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() + +// vmNum := 10 +// vmIDBase := 7 + +// orch := NewOrchestrator( +// "devmapper", +// "", +// WithTestModeOn(true), +// WithUPF(*isUPFEnabled), +// WithLazyMode(*isLazyMode), +// ) + +// // Pull image +// _, err := orch.getImage(ctx, testImageName) +// require.NoError(t, err, "Failed to pull image "+testImageName) + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM "+vmID) +// }(i) +// } +// vmGroup.Wait() +// } + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// err := orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM "+vmID) +// }(i) +// } +// vmGroup.Wait() +// } + +// orch.Cleanup() +// } + +// func TestPauseResumeParallel(t *testing.T) { +// log.SetFormatter(&log.TextFormatter{ +// TimestampFormat: ctrdlog.RFC3339NanoFixed, +// FullTimestamp: true, +// }) +// //log.SetReportCaller(true) // FIXME: make sure it's false unless debugging + +// log.SetOutput(os.Stdout) + +// log.SetLevel(log.InfoLevel) + +// testTimeout := 120 * time.Second +// ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) +// defer cancel() + +// vmNum := 10 +// vmIDBase := 17 + +// orch := NewOrchestrator( +// "devmapper", +// "", +// WithTestModeOn(true), +// WithUPF(*isUPFEnabled), +// WithLazyMode(*isLazyMode), +// ) + +// // Pull image +// _, err := orch.getImage(ctx, testImageName) +// require.NoError(t, err, "Failed to pull image "+testImageName) + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// _, _, err := orch.StartVM(ctx, vmID, testImageName) +// require.NoError(t, err, "Failed to start VM") +// }(i) +// } +// vmGroup.Wait() +// } + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// err := orch.PauseVM(ctx, vmID) +// require.NoError(t, err, "Failed to pause VM") +// }(i) +// } +// vmGroup.Wait() +// } + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// _, err := orch.ResumeVM(ctx, vmID) +// require.NoError(t, err, "Failed to resume VM") +// }(i) +// } +// vmGroup.Wait() +// } + +// { +// var vmGroup sync.WaitGroup +// for i := vmIDBase; i < vmNum; i++ { +// vmGroup.Add(1) +// go func(i int) { +// defer vmGroup.Done() +// vmID := fmt.Sprintf("%d", i) +// err := orch.StopSingleVM(ctx, vmID) +// require.NoError(t, err, "Failed to stop VM") +// }(i) +// } +// vmGroup.Wait() +// } + +// orch.Cleanup() +// } diff --git a/ctriface/orch.go b/ctriface/orch.go index 121ddd2f8..8cc9cc6de 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -23,7 +23,6 @@ package ctriface import ( - "github.com/vhive-serverless/vhive/devmapper" "os" "os/signal" "path/filepath" @@ -32,6 +31,8 @@ import ( "syscall" "time" + "github.com/vhive-serverless/vhive/devmapper" + log "github.com/sirupsen/logrus" "github.com/containerd/containerd" @@ -88,7 +89,7 @@ type Orchestrator struct { isUPFEnabled bool isLazyMode bool snapshotsDir string - uffdSockAddr string + uffdSockAddr string isMetricsMode bool netPoolSize int @@ -122,14 +123,15 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) } if o.GetUPFEnabled() { - _, err = os.Create(o.uffdSockAddr) + file, err := os.Create(o.uffdSockAddr) if err != nil { - log.Fatal("TEST: failed to create uffd sock", err) + log.Fatalf("Failed to create socket file: %v", err) } - + defer file.Close() + managerCfg := manager.MemoryManagerCfg{ MetricsModeOn: o.isMetricsMode, - UffdSockAddr: o.uffdSockAddr, + UffdSockAddr: o.uffdSockAddr, } o.memoryManager = manager.NewMemoryManager(managerCfg) } @@ -218,7 +220,7 @@ func (o *Orchestrator) GetSnapshotsDir() string { // TODO: /tmp/uffd/firecracker-containerd#3-0/uffd.sock func (o *Orchestrator) getUffdSockAddr(vmID string) string { return filepath.Join(o.getVMBaseDir(vmID), "uffd.sock") -} +} func (o *Orchestrator) getSnapshotFile(vmID string) string { return filepath.Join(o.getVMBaseDir(vmID), "snap_file") diff --git a/go.mod b/go.mod index 9aca89c1d..6f116b3d1 100644 --- a/go.mod +++ b/go.mod @@ -40,9 +40,11 @@ replace ( replace ( github.com/firecracker-microvm/firecracker-containerd => ../firecracker-containerd + github.com/vhive-serverless/vhive/ctriface => ./ctriface // github.com/firecracker-microvm/firecracker-containerd => github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 github.com/vhive-serverless/vhive/examples/protobuf/helloworld => ./examples/protobuf/helloworld + github.com/vhive-serverless/vhive/lg => ./lg ) require ( diff --git a/go.sum b/go.sum index fd6b5eec0..20ae4a93e 100644 --- a/go.sum +++ b/go.sum @@ -147,8 +147,6 @@ github.com/cespare/prettybench v0.0.0-20150116022406-03b8cfe5406c/go.mod h1:Xe6Z github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5/go.mod h1:/iP1qXHoty45bqomnu2LM+VVyAEdWN+vtSHGlQgyxbw= -github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134 h1:InrwKCxhDU1PJTNJ0wOHM/PvsIruaz2HriViJ5swrX4= -github.com/char-1ee/firecracker-containerd v0.0.0-20231018191519-49cac5eea134/go.mod h1:XC5a/4PWbzipD5Ron745odZxoVy/J6d8xFldwTZJbSU= github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b/go.mod h1:TrMrLQfeENAPYPRsJuq3jsqdlRh3lvi6trTZJG8+tho= github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= diff --git a/lg/uni_logger.go b/lg/uni_logger.go new file mode 100644 index 000000000..b8decb6a5 --- /dev/null +++ b/lg/uni_logger.go @@ -0,0 +1,17 @@ +package lg + +import ( + "log" + "os" + // log "github.com/sirupsen/logrus" +) + +var UniLogger *log.Logger + +func init() { + file, err := os.OpenFile("output.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + log.Fatalln("Failed to open log file:", err) + } + UniLogger = log.New(file, "DEBUG: ", log.Ldate|log.Ltime|log.Lshortfile) +} diff --git a/memory/manager/manager.go b/memory/manager/manager.go index c51e251d4..e48e1b39b 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -46,7 +46,7 @@ const ( // MemoryManagerCfg Global config of the manager type MemoryManagerCfg struct { MetricsModeOn bool - UffdSockAddr string // it could not be appropriate to put sock here + UffdSockAddr string // it could not be appropriate to put sock here } // MemoryManager Serves page faults coming from VMs @@ -54,9 +54,7 @@ type MemoryManager struct { sync.Mutex MemoryManagerCfg instances map[string]*SnapshotState // Indexed by vmID - origins map[string]string // Track parent vm for vm loaded from snapshot - - // TODO: snapshot and its children + origins map[string]string // Track parent vm for vm loaded from snapshot } // NewMemoryManager Initializes a new memory manager @@ -65,8 +63,9 @@ func NewMemoryManager(cfg MemoryManagerCfg) *MemoryManager { m := new(MemoryManager) m.instances = make(map[string]*SnapshotState) + m.origins = make(map[string]string) m.MemoryManagerCfg = cfg - + return m } @@ -397,7 +396,7 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } -// Deprecated +// Deprecated // func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { // logger := log.WithFields(log.Fields{"vmID": vmID}) diff --git a/memory/manager/snapshot_state.go b/memory/manager/snapshot_state.go index 98fc14117..34e041918 100644 --- a/memory/manager/snapshot_state.go +++ b/memory/manager/snapshot_state.go @@ -44,6 +44,7 @@ import ( log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/vhive-serverless/vhive/lg" "github.com/vhive-serverless/vhive/metrics" "unsafe" @@ -290,15 +291,7 @@ func (s *SnapshotState) pollUserPageFaults(readyCh chan int) { // TODO: config where the logger stream goes logger.Debug("Starting polling loop") - fmt.Printf("Starting polling loop") - - logFile, err := os.OpenFile("pg_happen.log", os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - log.Fatalf("error opening file: %v", err) - } - defer logFile.Close() - log.SetOutput(logFile) - log.Println("This is a test log entry") + lg.UniLogger.Println("Starting polling loop") defer syscall.Close(s.epfd) From e8092e40abb6c1780d098c7f78849945ab74f37b Mon Sep 17 00:00:00 2001 From: char-1ee Date: Thu, 22 Feb 2024 12:33:25 -0700 Subject: [PATCH 16/21] Add listener to uffd socket before epolling --- ctriface/iface_test.go | 1 - ctriface/orch.go | 3 +- memory/manager/manager.go | 72 ++++++++++++++++++++++++++++++++------- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index 677dc9d1e..d2b7c7e8b 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -68,7 +68,6 @@ func TestStartSnapStop(t *testing.T) { ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), namespaceName), testTimeout) defer cancel() - // uffdSockAddr := "/tmp/uffd.sock" // uffdSockDir := "/home/char/uffd" // uffdSockAddr := uffdSockDir + "/uffd.sock" uffdSockAddr := "/tmp/uffd.sock" diff --git a/ctriface/orch.go b/ctriface/orch.go index 8cc9cc6de..33394c4c2 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -128,12 +128,13 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) log.Fatalf("Failed to create socket file: %v", err) } defer file.Close() - + managerCfg := manager.MemoryManagerCfg{ MetricsModeOn: o.isMetricsMode, UffdSockAddr: o.uffdSockAddr, } o.memoryManager = manager.NewMemoryManager(managerCfg) + go o.memoryManager.ListenUffdSocket(o.uffdSockAddr) } log.Info("Creating containerd client") diff --git a/memory/manager/manager.go b/memory/manager/manager.go index e48e1b39b..b694abe8c 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -26,6 +26,7 @@ import ( "encoding/csv" "errors" "fmt" + "net" "os" "strconv" "sync" @@ -53,8 +54,10 @@ type MemoryManagerCfg struct { type MemoryManager struct { sync.Mutex MemoryManagerCfg - instances map[string]*SnapshotState // Indexed by vmID - origins map[string]string // Track parent vm for vm loaded from snapshot + instances map[string]*SnapshotState // Indexed by vmID + origins map[string]string // Track parent vm for vm loaded from snapshot + startEpollingCh chan struct{} + startEpollingOnce sync.Once } // NewMemoryManager Initializes a new memory manager @@ -64,7 +67,9 @@ func NewMemoryManager(cfg MemoryManagerCfg) *MemoryManager { m := new(MemoryManager) m.instances = make(map[string]*SnapshotState) m.origins = make(map[string]string) + m.startEpollingCh = make(chan struct{}, 1) m.MemoryManagerCfg = cfg + m.startEpollingOnce = sync.Once{} return m } @@ -182,21 +187,29 @@ func (m *MemoryManager) Activate(vmID string) error { return errors.New("VM already active") } - if err := state.mapGuestMemory(); err != nil { - logger.Error("Failed to map guest memory") - return err - } + select { + case <-m.startEpollingCh: + if err := state.mapGuestMemory(); err != nil { + logger.Error("Failed to map guest memory") + return err + } - if err := state.getUFFD(); err != nil { - logger.Error("Failed to get uffd") - return err - } + if err := state.getUFFD(); err != nil { + logger.Error("Failed to get uffd") + return err + } - state.setupStateOnActivate() + state.setupStateOnActivate() - go state.pollUserPageFaults(readyCh) + go state.pollUserPageFaults(readyCh) - <-readyCh + <-readyCh + + case <-time.After(100 * time.Second): + return errors.New("Uffd connection to firecracker timeout") + default: + return errors.New("Failed to start epoller") + } return nil } @@ -396,6 +409,39 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } +func (m *MemoryManager) ListenUffdSocket(uffdSockAddr string) error { + log.Debug("Start listening to uffd socket") + + m.startEpollingOnce.Do(func() { + m.startEpollingCh = make(chan struct{}) + }) + + ln, err := net.Listen("unix", uffdSockAddr) + if err != nil { + log.Errorf("Failed to listen on uffd socket: %v", err) + return errors.New("Failed to listen on uffd socket") + } + defer ln.Close() + + for { + conn, err := ln.Accept() + if err != nil { + log.Printf("Failed to accept connection on uffd socket: %v", err) + continue + } + go func(conn net.Conn) { + defer conn.Close() + if err := ln.Close(); err != nil { + log.Printf("Failed to close uffd socket listener: %v", err) + } + close(m.startEpollingCh) + }(conn) + break + } + + return nil +} + // Deprecated // func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { // logger := log.WithFields(log.Fields{"vmID": vmID}) From e78c5fdf60d349de39e84c7ef28ea8e919501e7f Mon Sep 17 00:00:00 2001 From: char-1ee Date: Wed, 28 Feb 2024 08:31:39 -0700 Subject: [PATCH 17/21] Test --- ctriface/iface.go | 47 ++++++++++++++++++- ctriface/iface_test.go | 15 +++--- ctriface/orch.go | 15 +++--- lg/uni_logger.go | 2 +- memory/manager/manager.go | 79 +++++--------------------------- memory/manager/snapshot_state.go | 57 +++++++++++++---------- scripts/clean_fcctr.sh | 3 ++ 7 files changed, 111 insertions(+), 107 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index 298546742..30fb0bc5a 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -25,6 +25,7 @@ package ctriface import ( "context" "encoding/json" + "net" "os" "os/exec" "path/filepath" @@ -505,6 +506,9 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID BackendPath: snap.GetMemFilePath(), } + var sendfdConn *net.UnixConn + uffdListenerCh := make(chan struct{}, 1) + if o.GetUPFEnabled() { logger.Debug("TEST: UPF is enabled") conf.MemBackend.BackendType = uffdBackend @@ -517,6 +521,44 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID if err := o.memoryManager.FetchState(originVmID); err != nil { return nil, nil, err } + + logger.Debug("TEST: start listening to uffd socket") + if _, err := os.Stat(conf.MemBackend.BackendPath); err == nil { + os.Remove(conf.MemBackend.BackendPath) + } + + // connChan := make(chan *net.UnixConn, 1) + errChan := make(chan error, 1) + go func() { + listener, err := net.Listen("unix", conf.MemBackend.BackendPath) + if err != nil { + errChan <- errors.Wrapf(err, "failed to listen to uffd socket") + return + // return nil, nil, errors.Wrapf(err, "failed to listen to uffd socket") + } + defer listener.Close() + + logger.Debug("Listening ...") + conn, err := listener.Accept() + if err != nil { + errChan <- errors.Wrapf(err, "failed to accept connection") + return + // return nil, nil, errors.Wrapf(err, "failed to accept connection") + } + + sendfdConn, _ = conn.(*net.UnixConn) + close(uffdListenerCh) + + // connChan <- sendfdConn + }() + + // select { + // case sendfdConn = <-connChan: + // logger.Debug("Connection accepted and type-asserted to *net.UnixConn") + // case err := <-errChan: + // logger.Errorf("Error occurred: %v\n", err) + // } + time.Sleep(10 * time.Second) } tStart = time.Now() @@ -556,12 +598,15 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID }() logger.Debug("TEST: CreatVM request sent") + <-loadDone if o.GetUPFEnabled() { logger.Debug("TEST: Registering VM with snap with the memory manager") + <-uffdListenerCh + stateCfg := manager.SnapshotStateCfg{ VMID: vmID, GuestMemPath: o.getMemoryFile(vmID), @@ -577,7 +622,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID } logger.Debug("TEST: activate VM in mm") - if activateErr = o.memoryManager.Activate(vmID); activateErr != nil { + if activateErr = o.memoryManager.Activate(vmID, sendfdConn); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } } diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index d2b7c7e8b..cdfce8700 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -33,8 +33,6 @@ import ( log "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "github.com/vhive-serverless/vhive/snapshotting" - - "github.com/vhive-serverless/vhive/lg" ) // TODO: Make it impossible to use lazy mode without UPF @@ -82,38 +80,41 @@ func TestStartSnapStop(t *testing.T) { vmID := "2" + log.Debug("STEP: StartVM") _, _, err := orch.StartVM(ctx, vmID, testImageName) require.NoError(t, err, "Failed to start VM") + log.Debug("STEP: PauseVM") err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM") + log.Debug("STEP: NewSnapshot and CreateSnapshot") snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") - // _, err = orch.ResumeVM(ctx, vmID) - // require.NoError(t, err, "Failed to resume VM after created snapshot") - + log.Debug("STEP: StopSingleVM") err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") originVmID := vmID vmID = "3" + log.Debug("STEP: LoadSnapshot") _, _, err = orch.LoadSnapshot(ctx, originVmID, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") - log.Debug("TEST: LoadSnapshot completed") - lg.UniLogger.Println("This is a test") + log.Debug("STEP: ResumeVM") _, err = orch.ResumeVM(ctx, vmID) require.NoError(t, err, "Failed to resume VM") time.Sleep(30 * time.Second) + log.Debug("STEP: StopeSingleVM") err = orch.StopSingleVM(ctx, vmID) require.NoError(t, err, "Failed to stop VM") + log.Debug("STEP: Cleanup") _ = snap.Cleanup() orch.Cleanup() } diff --git a/ctriface/orch.go b/ctriface/orch.go index 33394c4c2..cf7618776 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -123,18 +123,21 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption) } if o.GetUPFEnabled() { - file, err := os.Create(o.uffdSockAddr) - if err != nil { - log.Fatalf("Failed to create socket file: %v", err) - } - defer file.Close() + // file, err := os.Create(o.uffdSockAddr) + // if err != nil { + // log.Fatalf("Failed to create socket file: %v", err) + // } + // defer file.Close() + // lg.UniLogger.Println("TEST: created the uffd sock addr") managerCfg := manager.MemoryManagerCfg{ MetricsModeOn: o.isMetricsMode, UffdSockAddr: o.uffdSockAddr, } o.memoryManager = manager.NewMemoryManager(managerCfg) - go o.memoryManager.ListenUffdSocket(o.uffdSockAddr) + + // lg.UniLogger.Println("TEST: created a new memory manager. Start listen uffd socket") + // go o.memoryManager.ListenUffdSocket(o.uffdSockAddr) } log.Info("Creating containerd client") diff --git a/lg/uni_logger.go b/lg/uni_logger.go index b8decb6a5..f626cf650 100644 --- a/lg/uni_logger.go +++ b/lg/uni_logger.go @@ -9,7 +9,7 @@ import ( var UniLogger *log.Logger func init() { - file, err := os.OpenFile("output.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + file, err := os.OpenFile("uni_output.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) if err != nil { log.Fatalln("Failed to open log file:", err) } diff --git a/memory/manager/manager.go b/memory/manager/manager.go index b694abe8c..b96acd5d9 100644 --- a/memory/manager/manager.go +++ b/memory/manager/manager.go @@ -149,7 +149,7 @@ func (m *MemoryManager) DeregisterVM(vmID string) error { } // Activate Creates an epoller to serve page faults for the VM -func (m *MemoryManager) Activate(vmID string) error { +func (m *MemoryManager) Activate(vmID string, conn *net.UnixConn) error { logger := log.WithFields(log.Fields{"vmID": vmID}) logger.Debug("Activating instance in the memory manager") @@ -164,14 +164,6 @@ func (m *MemoryManager) Activate(vmID string) error { logger.Debug("TEST: Activate: fetch snapstate by vmID for UFFD") - // originID, ok := m.origins[vmID] - - // if !ok { - // logger.Debug("TEST: not loaded from snapshot") - // } - - // state, ok = m.instances[originID] - state, ok = m.instances[vmID] if !ok { @@ -187,29 +179,21 @@ func (m *MemoryManager) Activate(vmID string) error { return errors.New("VM already active") } - select { - case <-m.startEpollingCh: - if err := state.mapGuestMemory(); err != nil { - logger.Error("Failed to map guest memory") - return err - } - - if err := state.getUFFD(); err != nil { - logger.Error("Failed to get uffd") - return err - } + if err := state.mapGuestMemory(); err != nil { + logger.Error("Failed to map guest memory") + return err + } - state.setupStateOnActivate() + if err := state.getUFFD(conn); err != nil { + logger.Error("Failed to get uffd") + return err + } - go state.pollUserPageFaults(readyCh) + state.setupStateOnActivate() - <-readyCh + go state.pollUserPageFaults(readyCh) - case <-time.After(100 * time.Second): - return errors.New("Uffd connection to firecracker timeout") - default: - return errors.New("Failed to start epoller") - } + <-readyCh return nil } @@ -229,12 +213,6 @@ func (m *MemoryManager) FetchState(vmID string) error { m.Lock() - // originID, ok := m.origins[vmID] - // if !ok { - // logger.Debug("TEST: not loaded from snapshot") - // } - // state, ok = m.instances[originID] - state, ok = m.instances[vmID] if !ok { m.Unlock() @@ -409,39 +387,6 @@ func (m *MemoryManager) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, erro return state.latencyMetrics, nil } -func (m *MemoryManager) ListenUffdSocket(uffdSockAddr string) error { - log.Debug("Start listening to uffd socket") - - m.startEpollingOnce.Do(func() { - m.startEpollingCh = make(chan struct{}) - }) - - ln, err := net.Listen("unix", uffdSockAddr) - if err != nil { - log.Errorf("Failed to listen on uffd socket: %v", err) - return errors.New("Failed to listen on uffd socket") - } - defer ln.Close() - - for { - conn, err := ln.Accept() - if err != nil { - log.Printf("Failed to accept connection on uffd socket: %v", err) - continue - } - go func(conn net.Conn) { - defer conn.Close() - if err := ln.Close(); err != nil { - log.Printf("Failed to close uffd socket listener: %v", err) - } - close(m.startEpollingCh) - }(conn) - break - } - - return nil -} - // Deprecated // func (m *MemoryManager) GetUPFSockPath(vmID string, isSnapshotReady bool) (string, error) { // logger := log.WithFields(log.Fields{"vmID": vmID}) diff --git a/memory/manager/snapshot_state.go b/memory/manager/snapshot_state.go index 34e041918..1c70720e8 100644 --- a/memory/manager/snapshot_state.go +++ b/memory/manager/snapshot_state.go @@ -28,7 +28,6 @@ package manager import "C" import ( - "context" "encoding/binary" "errors" "fmt" @@ -134,37 +133,45 @@ func (s *SnapshotState) setupStateOnActivate() { } } -func (s *SnapshotState) getUFFD() error { - var d net.Dialer - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() +func (s *SnapshotState) getUFFD(sendfdConn *net.UnixConn) error { + // var d net.Dialer + // ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + // defer cancel() - for { - c, err := d.DialContext(ctx, "unix", s.InstanceSockAddr) - if err != nil { - if ctx.Err() != nil { - log.Error("Failed to dial within the context timeout") - return err - } - time.Sleep(1 * time.Millisecond) - continue - } - log.Debugf("TEST: Dial uffd socket done: %s", s.InstanceSockAddr) + // for { + // c, err := d.DialContext(ctx, "unix", s.InstanceSockAddr) + // if err != nil { + // if ctx.Err() != nil { + // log.Error("Failed to dial within the context timeout") + // return err + // } + // time.Sleep(1 * time.Millisecond) + // continue + // } - defer c.Close() + // defer c.Close() - sendfdConn := c.(*net.UnixConn) + // sendfdConn := c.(*net.UnixConn) - fs, err := fd.Get(sendfdConn, 1, []string{"a file"}) - if err != nil { - log.Error("Failed to receive the uffd") - return err - } + // fs, err := fd.Get(sendfdConn, 1, []string{"a file"}) + // if err != nil { + // log.Error("Failed to receive the uffd") + // return err + // } - s.userFaultFD = fs[0] + // s.userFaultFD = fs[0] - return nil + // return nil + // } + + fs, err := fd.Get(sendfdConn, 1, []string{"a file"}) + if err != nil { + log.Error("Failed to receive the uffd") + return err } + + s.userFaultFD = fs[0] + return nil } func (s *SnapshotState) processMetrics() { diff --git a/scripts/clean_fcctr.sh b/scripts/clean_fcctr.sh index 01cc1f06c..61c30ea52 100755 --- a/scripts/clean_fcctr.sh +++ b/scripts/clean_fcctr.sh @@ -80,5 +80,8 @@ sudo rm /var/lib/cni/networks/fcnet*/19* || echo clean already echo Cleaning snapshots sudo rm -rf /fccd/snapshots/* +echo Cleaning UFFD socket +sudo rm -f /tmp/uffd.sock + echo Creating a fresh devmapper source $DIR/create_devmapper.sh From e0bef9a4125ddedc64fd7d30bd41789dc5a7ced7 Mon Sep 17 00:00:00 2001 From: char-1ee Date: Mon, 4 Mar 2024 02:31:25 -0500 Subject: [PATCH 18/21] Parse params from firecracker via socket Signed-off-by: char-1ee --- ctriface/iface.go | 26 ++++-------- memory/manager/snapshot_state.go | 72 +++++++++++++++++--------------- 2 files changed, 45 insertions(+), 53 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index 30fb0bc5a..6b19f445f 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -527,38 +527,26 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID os.Remove(conf.MemBackend.BackendPath) } - // connChan := make(chan *net.UnixConn, 1) - errChan := make(chan error, 1) go func() { listener, err := net.Listen("unix", conf.MemBackend.BackendPath) if err != nil { - errChan <- errors.Wrapf(err, "failed to listen to uffd socket") + logger.Error("failed to listen to uffd socket") return - // return nil, nil, errors.Wrapf(err, "failed to listen to uffd socket") } defer listener.Close() - + logger.Debug("Listening ...") conn, err := listener.Accept() if err != nil { - errChan <- errors.Wrapf(err, "failed to accept connection") - return - // return nil, nil, errors.Wrapf(err, "failed to accept connection") + logger.Error("failed to accept connection to uffd socket") + return } - sendfdConn, _ = conn.(*net.UnixConn) + sendfdConn, _ = conn.(*net.UnixConn) close(uffdListenerCh) - - // connChan <- sendfdConn }() - // select { - // case sendfdConn = <-connChan: - // logger.Debug("Connection accepted and type-asserted to *net.UnixConn") - // case err := <-errChan: - // logger.Errorf("Error occurred: %v\n", err) - // } - time.Sleep(10 * time.Second) + time.Sleep(10 * time.Second) // TODO: sleep for 10 seconds to wait for the uffd socket to be ready } tStart = time.Now() @@ -622,7 +610,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID } logger.Debug("TEST: activate VM in mm") - if activateErr = o.memoryManager.Activate(vmID, sendfdConn); activateErr != nil { + if activateErr = o.memoryManager.Activate(originVmID, sendfdConn); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } } diff --git a/memory/manager/snapshot_state.go b/memory/manager/snapshot_state.go index 1c70720e8..a0bb7d076 100644 --- a/memory/manager/snapshot_state.go +++ b/memory/manager/snapshot_state.go @@ -29,6 +29,7 @@ import "C" import ( "encoding/binary" + "encoding/json" "errors" "fmt" "net" @@ -39,7 +40,6 @@ import ( "syscall" "time" - "github.com/ftrvxmtrx/fd" log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -133,44 +133,49 @@ func (s *SnapshotState) setupStateOnActivate() { } } -func (s *SnapshotState) getUFFD(sendfdConn *net.UnixConn) error { - // var d net.Dialer - // ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - // defer cancel() - - // for { - // c, err := d.DialContext(ctx, "unix", s.InstanceSockAddr) - // if err != nil { - // if ctx.Err() != nil { - // log.Error("Failed to dial within the context timeout") - // return err - // } - // time.Sleep(1 * time.Millisecond) - // continue - // } - - // defer c.Close() - - // sendfdConn := c.(*net.UnixConn) +type GuestRegionUffdMapping struct { + BaseHostVirtAddr uint64 `json:"base_host_virt_addr"` + Size uint64 `json:"size"` + Offset uint64 `json:"offset"` + PageSizeKiB uint64 `json:"page_size_kib"` +} - // fs, err := fd.Get(sendfdConn, 1, []string{"a file"}) - // if err != nil { - // log.Error("Failed to receive the uffd") - // return err - // } +func (s *SnapshotState) getUFFD(sendfdConn *net.UnixConn) error { + buff := make([]byte, 256) // set a maximum buffer size + oobBuff := make([]byte, unix.CmsgSpace(4)) - // s.userFaultFD = fs[0] + n, oobn, _, _, err := sendfdConn.ReadMsgUnix(buff, oobBuff) + if err != nil { + return fmt.Errorf("error reading message: %w", err) + } + buff = buff[:n] - // return nil - // } + var fd int + if oobn > 0 { + scms, err := unix.ParseSocketControlMessage(oobBuff[:oobn]) + if err != nil { + return fmt.Errorf("error parsing socket control message: %w", err) + } + for _, scm := range scms { + fds, err := unix.ParseUnixRights(&scm) + if err != nil { + return fmt.Errorf("error parsing unix rights: %w", err) + } + if len(fds) > 0 { + fd = fds[0] // Assuming only one fd is sent. + break + } + } + } + userfaultFD := os.NewFile(uintptr(fd), "userfaultfd") - fs, err := fd.Get(sendfdConn, 1, []string{"a file"}) - if err != nil { - log.Error("Failed to receive the uffd") - return err + var mapping []GuestRegionUffdMapping + if err := json.Unmarshal(buff, &mapping); err != nil { + return fmt.Errorf("error unmarshaling data: %w", err) } - s.userFaultFD = fs[0] + s.startAddress = mapping[0].BaseHostVirtAddr + s.userFaultFD = userfaultFD return nil } @@ -401,7 +406,6 @@ func (s *SnapshotState) servePageFault(fd int, address uint64) error { s.firstPageFaultOnce.Do( func() { - s.startAddress = address log.Debugf("TEST: first page fault address %d", address) if s.isRecordReady && !s.IsLazyMode { From f294880b17c9fbbac8473a9a02b528f8a94f380b Mon Sep 17 00:00:00 2001 From: char-1ee Date: Thu, 14 Mar 2024 23:06:11 -0400 Subject: [PATCH 19/21] Update loadsnapshot params Signed-off-by: char-1ee --- bin/containerd-shim-aws-firecracker | 4 +- bin/default-rootfs.img | 4 +- bin/firecracker-containerd | 4 +- bin/firecracker-ctr | 4 +- ctriface/iface.go | 106 +++++++++++++++------------- 5 files changed, 65 insertions(+), 57 deletions(-) diff --git a/bin/containerd-shim-aws-firecracker b/bin/containerd-shim-aws-firecracker index 84c3ba700..d02c5f7be 100755 --- a/bin/containerd-shim-aws-firecracker +++ b/bin/containerd-shim-aws-firecracker @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9595863ca1a4903d7cd0715a5da046e1ce4d88c76bd58c9deef586b583bed79a -size 36354528 +oid sha256:593f7fff0ae8512859e08bd129375da10c13088d836cbaeb1608847b72c1cf28 +size 36382664 diff --git a/bin/default-rootfs.img b/bin/default-rootfs.img index e2bfac4d5..5755b47a4 100644 --- a/bin/default-rootfs.img +++ b/bin/default-rootfs.img @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a2e7504d702db942e88158547c1076db786f861fb0b69ec96eb27716e1e37b1 -size 73318400 +oid sha256:f042d5db9797c16255db5a32d9644e8cad838a2ef46da9c08a6ae38a24111f97 +size 73342976 diff --git a/bin/firecracker-containerd b/bin/firecracker-containerd index 2334c5121..464134b33 100755 --- a/bin/firecracker-containerd +++ b/bin/firecracker-containerd @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4450b3c8e9cb2db1a193cd2b9594eb90054cdbdae1d837a7f426b4b0d83950f -size 72445304 +oid sha256:257ee96d4b2cd4a04f61833eafc31e2d814bb923e42ee0386077873f082aaa07 +size 72481176 diff --git a/bin/firecracker-ctr b/bin/firecracker-ctr index 3bfb99c22..f171ddc4b 100755 --- a/bin/firecracker-ctr +++ b/bin/firecracker-ctr @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37487275ed6a08f4e759d9847435537bf52f20c207087e3e4226e599de8bae72 -size 35276648 +oid sha256:2f60c7f0984dfdba666c9159761fe93357d6558dbbe7c3326bc4959b3bdf72b6 +size 35333240 diff --git a/ctriface/iface.go b/ctriface/iface.go index 6b19f445f..0412c2b3a 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -458,7 +458,7 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID loadSnapshotMetric *metrics.Metric = metrics.NewMetric() tStart time.Time loadErr, activateErr error - loadDone = make(chan int) + // loadDone = make(chan int) ) logger := log.WithFields(log.Fields{"vmID": vmID}) @@ -505,9 +505,12 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID BackendType: fileBackend, BackendPath: snap.GetMemFilePath(), } + conf.ResumeVM = true + conf.EnableDiffSnapshots = false var sendfdConn *net.UnixConn - uffdListenerCh := make(chan struct{}, 1) + // uffdListenerCh := make(chan struct{}, 1) + var listener net.Listener if o.GetUPFEnabled() { logger.Debug("TEST: UPF is enabled") @@ -527,73 +530,76 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID os.Remove(conf.MemBackend.BackendPath) } - go func() { - listener, err := net.Listen("unix", conf.MemBackend.BackendPath) - if err != nil { - logger.Error("failed to listen to uffd socket") - return - } - defer listener.Close() + // =============================================================== + listener, err = net.Listen("unix", conf.MemBackend.BackendPath) + if err != nil { + logger.Error("failed to listen to uffd socket") + return + } + // defer listener.Close() - logger.Debug("Listening ...") - conn, err := listener.Accept() - if err != nil { - logger.Error("failed to accept connection to uffd socket") - return - } + // logger.Debug("Listening ...") + // conn, err := listener.Accept() + // if err != nil { + // logger.Error("failed to accept connection to uffd socket") + // return + // } - sendfdConn, _ = conn.(*net.UnixConn) - close(uffdListenerCh) - }() + // sendfdConn, _ = conn.(*net.UnixConn) + // close(uffdListenerCh) - time.Sleep(10 * time.Second) // TODO: sleep for 10 seconds to wait for the uffd socket to be ready + // time.Sleep(10 * time.Second) // TODO: sleep for 10 seconds to wait for the uffd socket to be ready } tStart = time.Now() - go func() { - defer close(loadDone) - - confStr, _ := json.Marshal(conf) - logger.Debugf("TEST: CreateVM request: %s", confStr) + confStr, _ := json.Marshal(conf) + logger.Debugf("TEST: CreateVM request: %s", confStr) - if _, loadErr := o.fcClient.CreateVM(ctx, conf); loadErr != nil { - logger.Error("Failed to load snapshot of the VM: ", loadErr) - logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) - files, err := os.ReadDir(filepath.Dir(snap.GetSnapshotFilePath())) - if err != nil { - logger.Error(err) - } - - snapFiles := "" - for _, f := range files { - snapFiles += f.Name() + ", " - } + if _, loadErr := o.fcClient.CreateVM(ctx, conf); loadErr != nil { + logger.Error("Failed to load snapshot of the VM: ", loadErr) + logger.Errorf("snapFilePath: %s, memFilePath: %s, newSnapshotPath: %s", snap.GetSnapshotFilePath(), snap.GetMemFilePath(), containerSnap.GetDevicePath()) + files, err := os.ReadDir(filepath.Dir(snap.GetSnapshotFilePath())) + if err != nil { + logger.Error(err) + } - logger.Error(snapFiles) + snapFiles := "" + for _, f := range files { + snapFiles += f.Name() + ", " + } - files, _ = os.ReadDir(filepath.Dir(containerSnap.GetDevicePath())) - if err != nil { - logger.Error(err) - } + logger.Error(snapFiles) - snapFiles = "" - for _, f := range files { - snapFiles += f.Name() + ", " - } - logger.Error(snapFiles) + files, _ = os.ReadDir(filepath.Dir(containerSnap.GetDevicePath())) + if err != nil { + logger.Error(err) } - }() + snapFiles = "" + for _, f := range files { + snapFiles += f.Name() + ", " + } + logger.Error(snapFiles) + } logger.Debug("TEST: CreatVM request sent") - <-loadDone + // <-loadDone if o.GetUPFEnabled() { + logger.Debug("Listening ...") + conn, err := listener.Accept() // TODO: a question, must accept() first before connect()? + if err != nil { + logger.Error("failed to accept connection to uffd socket") + return + } + sendfdConn, _ = conn.(*net.UnixConn) + listener.Close() + // close(uffdListenerCh) logger.Debug("TEST: Registering VM with snap with the memory manager") - <-uffdListenerCh + // <-uffdListenerCh stateCfg := manager.SnapshotStateCfg{ VMID: vmID, @@ -613,6 +619,8 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID if activateErr = o.memoryManager.Activate(originVmID, sendfdConn); activateErr != nil { logger.Warn("Failed to activate VM in the memory manager", activateErr) } + + // time.Sleep(30 * time.Minute) // pause to see fc logs } // <-loadDone From f3a3116c511a4f1bf30d53b050dcfd2364ba2c0b Mon Sep 17 00:00:00 2001 From: bchun001 Date: Thu, 25 Apr 2024 01:12:26 -0600 Subject: [PATCH 20/21] fix: changed firecracker-runtime config so that debug --- configs/firecracker-containerd/firecracker-runtime.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/firecracker-containerd/firecracker-runtime.json b/configs/firecracker-containerd/firecracker-runtime.json index 0986eb94d..d6f2bf977 100644 --- a/configs/firecracker-containerd/firecracker-runtime.json +++ b/configs/firecracker-containerd/firecracker-runtime.json @@ -4,5 +4,6 @@ "kernel_args": "console=ttyS0 noapic reboot=k panic=1 pci=off nomodules ro systemd.journald.forward_to_console systemd.unit=firecracker.target init=/sbin/overlay-init", "root_drive": "/var/lib/firecracker-containerd/runtime/default-rootfs.img", "cpu_template": "T2", - "log_levels": ["info"] + "log_levels": ["debug"], + "debug": true } \ No newline at end of file From a630027cc2a174ed29664e7fb497875d639ac8bc Mon Sep 17 00:00:00 2001 From: BenChun <67034610+BenjaminChun@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:24:18 +0800 Subject: [PATCH 21/21] tmp: remove these fields while xj updates the binaries --- ctriface/iface.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctriface/iface.go b/ctriface/iface.go index 0412c2b3a..9271e1f04 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -505,8 +505,8 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, originVmID string, vmID BackendType: fileBackend, BackendPath: snap.GetMemFilePath(), } - conf.ResumeVM = true - conf.EnableDiffSnapshots = false + // conf.ResumeVM = true + // conf.EnableDiffSnapshots = false var sendfdConn *net.UnixConn // uffdListenerCh := make(chan struct{}, 1)