From 181f95a5c7d7b778a76ef7cbd3261ff310f9857d Mon Sep 17 00:00:00 2001 From: Tolya Korniltsev Date: Tue, 4 Jun 2024 13:57:17 +0800 Subject: [PATCH] fix(godeltaprof): fix mutex tests on gotip (#108) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #103 This PR fixes an issue when a heap/mutex profile has a record with equal stack (by accumulating values for duplicate stacks before delta computation). The issue was found in tests for mutex(golang started to produce two profile records for the same mutex ( see the linked issue for more info)) I'm not sure if it is possible to trigger the issue for heap. The PR turned up big because of refactored test helpers for reuse. I can split PR into 2 or 3 smaller ones if needed. This PR also drops support for go16, go17 as I wanted to use generics in prof/map.go goos: darwin goarch: arm64 pkg: github.com/grafana/pyroscope-go/godeltaprof/compat │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ HeapCompression-8 691.1µ ± 0% 729.8µ ± 65% +5.59% (p=0.002 n=6) MutexCompression/ScalerMutexProfile-8 640.6µ ± 1% 693.8µ ± 9% +8.30% (p=0.002 n=6) MutexCompression/ScalerBlockProfile-8 643.2µ ± 1% 690.3µ ± 1% +7.33% (p=0.002 n=6) HeapDelta-8 76.26µ ± 1% 113.92µ ± 2% +49.38% (p=0.002 n=6) MutexDelta/ScalerMutexProfile-8 48.98µ ± 1% 81.11µ ± 2% +65.59% (p=0.002 n=6) MutexDelta/ScalerBlockProfile-8 49.03µ ± 1% 81.14µ ± 1% +65.49% (p=0.002 n=6) geomean 193.3µ 253.0µ +30.87% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ HeapCompression-8 974.1Ki ± 0% 973.9Ki ± 0% -0.03% (p=0.002 n=6) MutexCompression/ScalerMutexProfile-8 974.6Ki ± 0% 974.3Ki ± 0% -0.02% (p=0.002 n=6) MutexCompression/ScalerBlockProfile-8 974.6Ki ± 0% 974.3Ki ± 0% -0.03% (p=0.002 n=6) HeapDelta-8 333.00 ± 0% 53.00 ± 0% -84.08% (p=0.002 n=6) MutexDelta/ScalerMutexProfile-8 312.00 ± 0% 30.00 ± 0% -90.38% (p=0.002 n=6) MutexDelta/ScalerBlockProfile-8 312.00 ± 0% 30.00 ± 0% -90.38% (p=0.002 n=6) geomean 17.42Ki 5.874Ki -66.28% │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ HeapCompression-8 788.0 ± 0% 787.0 ± 0% -0.13% (p=0.002 n=6) MutexCompression/ScalerMutexProfile-8 788.0 ± 0% 787.0 ± 0% -0.13% (p=0.002 n=6) MutexCompression/ScalerBlockProfile-8 788.0 ± 0% 787.0 ± 0% -0.13% (p=0.002 n=6) HeapDelta-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.002 n=6) MutexDelta/ScalerMutexProfile-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.002 n=6) MutexDelta/ScalerBlockProfile-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.002 n=6) geomean 39.70 28.05 -29.33% So it looks like the cost of deduplication is about 5% of overall time(including compression), which is not that bad. --- .github/workflows/go.yml | 6 +- Makefile | 5 - go.mod | 2 +- go.mod_go16_test.txt | 12 - go.sum | 4 +- go.work.sum | 6 - godeltaprof/compat/compression_test.go | 71 ++---- godeltaprof/compat/delta_test.go | 280 +++++++++++++--------- godeltaprof/compat/reject_order_test.go | 40 ++-- godeltaprof/compat/stackcollapse.go | 92 +++++-- godeltaprof/compat/stub_go23_test.go | 3 + godeltaprof/compat/testdata.go | 198 ++++++++++++--- godeltaprof/go.mod | 4 +- godeltaprof/go.sum | 4 +- godeltaprof/internal/pprof/delta_heap.go | 60 ++++- godeltaprof/internal/pprof/delta_mutex.go | 45 +++- godeltaprof/internal/pprof/map.go | 38 +-- godeltaprof/internal/pprof/stub_go23.go | 2 +- 18 files changed, 554 insertions(+), 318 deletions(-) delete mode 100644 go.mod_go16_test.txt diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3bf470a..846b8d0 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - go: ['1.16', '1.17', '1.18', '1.19', '1.20', '1.21', '1.22', 'tip'] + go: ['1.18', '1.19', '1.20', '1.21', '1.22', 'tip'] steps: - name: Checkout uses: actions/checkout@v3 @@ -21,11 +21,7 @@ jobs: uses: actions/setup-go@v4 with: go-version: ${{ matrix.go }} - - name: Downgrade go.mod for go 1.17 and go 1.16 - if: matrix.go == '1.17' || matrix.go == '1.16' - run: make go/mod_16_for_testing - name: Run go/mod - if: matrix.go != '1.17' && matrix.go != '1.16' run: make go/mod && git diff --exit-code - name: Install Go stable if: matrix.go == 'tip' diff --git a/Makefile b/Makefile index abbaf43..7c2167d 100644 --- a/Makefile +++ b/Makefile @@ -15,8 +15,3 @@ go/mod: cd godeltaprof/ && GO111MODULE=on go mod download cd godeltaprof/ && GO111MODULE=on go mod tidy -.PHONY: go/mod_16_for_testing -go/mod_16_for_testing: - rm -rf godeltaprof/compat/go.mod godeltaprof/compat/go.sum godeltaprof/go.mod godeltaprof/go.sum go.work otelpyroscope/ - cat go.mod_go16_test.txt > go.mod - go mod tidy diff --git a/go.mod b/go.mod index c6ab79c..4fe966d 100644 --- a/go.mod +++ b/go.mod @@ -6,4 +6,4 @@ replace github.com/grafana/pyroscope-go/godeltaprof => ./godeltaprof require github.com/grafana/pyroscope-go/godeltaprof v0.1.6 -require github.com/klauspost/compress v1.17.3 // indirect +require github.com/klauspost/compress v1.17.8 // indirect diff --git a/go.mod_go16_test.txt b/go.mod_go16_test.txt deleted file mode 100644 index 0f0f0dc..0000000 --- a/go.mod_go16_test.txt +++ /dev/null @@ -1,12 +0,0 @@ -module github.com/grafana/pyroscope-go - -go 1.16 - -require ( - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/google/pprof v0.0.0-20231127191134-f3a68a39ae15 - github.com/stretchr/testify v1.7.0 - golang.org/x/mod v0.14.0 // indirect - golang.org/x/tools v0.12.0 - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/go.sum b/go.sum index 0d8e8f5..734f917 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,2 @@ -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= diff --git a/go.work.sum b/go.work.sum index e663de3..2922f3a 100644 --- a/go.work.sum +++ b/go.work.sum @@ -6,17 +6,12 @@ github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3I github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk= github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -27,7 +22,6 @@ github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go index e9305e4..b5923e3 100644 --- a/godeltaprof/compat/compression_test.go +++ b/godeltaprof/compat/compression_test.go @@ -2,33 +2,27 @@ package compat import ( "io" - "math/rand" "runtime" "testing" - - "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" ) func BenchmarkHeapCompression(b *testing.B) { - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: true, - LazyMapping: true, - } - dh := new(pprof.DeltaHeapProfiler) - fs := generateMemProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - objSize := fs[0].AllocBytes / fs[0].AllocObjects - nMutations := int(uint(rng.Int63())) % len(fs) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) + b.ResetTimer() for i := 0; i < b.N; i++ { - _ = WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate)) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].AllocObjects += 1 - fs[idx].AllocBytes += objSize - fs[idx].FreeObjects += 1 - fs[idx].FreeBytes += objSize + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate)) + h.mutate(nmutations, fs) } } @@ -43,38 +37,25 @@ func BenchmarkMutexCompression(b *testing.B) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: true, - LazyMapping: true, - } - dh := new(pprof.DeltaMutexProfiler) - fs := generateBlockProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - nMutations := int(uint(rng.Int63())) % len(fs) - oneBlockCycles := fs[0].Cycles / fs[0].Count + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) b.ResetTimer() for i := 0; i < b.N; i++ { - _ = PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].Count += 1 - fs[idx].Cycles += oneBlockCycles + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs) + h.mutate(nmutations, fs) } }) } } - -func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error { - stc := pprof.HeapProfileConfig(rate) - b := pprof.NewProfileBuilder(w, opt, stc) - return dp.WriteHeapProto(b, p, rate) -} - -func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error { - stc := pprof.MutexProfileConfig() - b := pprof.NewProfileBuilder(w, opt, stc) - return d.PrintCountCycleProfile(b, scaler, records) -} diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go index 1dded69..2822ef4 100644 --- a/godeltaprof/compat/delta_test.go +++ b/godeltaprof/compat/delta_test.go @@ -1,13 +1,14 @@ package compat import ( - "bytes" - "math/rand" - "runtime" - "testing" - + "fmt" + gprofile "github.com/google/pprof/profile" "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" "github.com/stretchr/testify/assert" + "reflect" + "runtime" + "strings" + "testing" ) var ( @@ -15,14 +16,51 @@ var ( stack0Marker string stack1 = [32]uintptr{} stack1Marker string + stack2 = [32]uintptr{} + stack2Marker string + stack3 = [32]uintptr{} + stack4 = [32]uintptr{} ) func init() { - fs := getFunctionPointers() - stack0 = [32]uintptr{fs[0], fs[1]} - stack1 = [32]uintptr{fs[2], fs[3]} - stack0Marker = runtime.FuncForPC(fs[1]).Name() + ";" + runtime.FuncForPC(fs[0]).Name() - stack1Marker = runtime.FuncForPC(fs[3]).Name() + ";" + runtime.FuncForPC(fs[2]).Name() + stack0 = [32]uintptr{ + reflect.ValueOf(assert.Truef).Pointer(), + reflect.ValueOf(assert.CallerInfo).Pointer(), + } + stack1 = [32]uintptr{ + reflect.ValueOf(assert.Condition).Pointer(), + reflect.ValueOf(assert.Conditionf).Pointer(), + } + stack2 = [32]uintptr{ + reflect.ValueOf(runtime.GC).Pointer(), + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer(), + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + stack3 = [32]uintptr{ // equal , but difference in runtime + reflect.ValueOf(runtime.GC).Pointer() + 1, + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer(), + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + + stack4 = [32]uintptr{ // equal , but difference in non runtime frame + reflect.ValueOf(runtime.GC).Pointer(), + reflect.ValueOf(runtime.FuncForPC).Pointer(), + reflect.ValueOf(TestDeltaBlockProfile).Pointer() + 1, + reflect.ValueOf(TestDeltaHeap).Pointer(), + } + marker := func(stk []uintptr) string { + res := []string{} + for i := range stk { + f := stk[len(stk)-1-i] + res = append(res, runtime.FuncForPC(f).Name()) + } + return strings.Join(res, ";") + } + stack0Marker = marker(stack0[:2]) + stack1Marker = marker(stack1[:2]) + stack2Marker = marker(stack2[2:4]) } func TestDeltaHeap(t *testing.T) { @@ -40,56 +78,41 @@ func TestDeltaHeap(t *testing.T) { const testMemProfileRate = 524288 const testObjectSize = 327680 - dh := new(pprof.DeltaHeapProfiler) - opt := new(pprof.ProfileBuilderOptions) - dump := func(r ...runtime.MemProfileRecord) *bytes.Buffer { - buf := bytes.NewBuffer(nil) - err := WriteHeapProto(dh, opt, buf, r, testMemProfileRate) - assert.NoError(t, err) - return buf - } - r := func(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord { - return runtime.MemProfileRecord{ - AllocObjects: AllocObjects, - AllocBytes: AllocBytes, - FreeBytes: FreeBytes, - FreeObjects: FreeObjects, - Stack0: s, - } - } + h := newHeapTestHelper() + h.rate = testMemProfileRate - p1 := dump( - r(0, 0, 0, 0, stack0), - r(0, 0, 0, 0, stack1), + p1 := h.dump( + h.r(0, 0, 0, 0, stack0), + h.r(0, 0, 0, 0, stack1), ) expectEmptyProfile(t, p1) - p2 := dump( - r(5, 5*testObjectSize, 0, 0, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p2 := h.dump( + h.r(5, 5*testObjectSize, 0, 0, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectStackFrames(t, p2, stack0Marker, 10, 3525422, 10, 3525422) expectStackFrames(t, p2, stack1Marker, 6, 2115253, 0, 0) for i := 0; i < 3; i++ { // if we write same data, stack0 is in use, stack1 should not be present - p3 := dump( - r(5, 5*testObjectSize, 0, 0, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p3 := h.dump( + h.r(5, 5*testObjectSize, 0, 0, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectStackFrames(t, p3, stack0Marker, 0, 0, 10, 3525422) expectNoStackFrames(t, p3, stack1Marker) } - p4 := dump( - r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p4 := h.dump( + h.r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) expectEmptyProfile(t, p4) - p5 := dump( - r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0), - r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), + p5 := h.dump( + h.r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0), + h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1), ) // note, this value depends on scale order, it currently tests the current implementation, but we may change it // to alloc objects to be scale(8) - scale(5) = 17-10 = 7 @@ -98,8 +121,6 @@ func TestDeltaHeap(t *testing.T) { } func TestDeltaBlockProfile(t *testing.T) { - cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9 - for i, scaler := range mutexProfileScalers { name := "ScalerMutexProfile" if i == 1 { @@ -110,56 +131,35 @@ func TestDeltaBlockProfile(t *testing.T) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - opt := new(pprof.ProfileBuilderOptions) - - scale := func(rcount, rcycles int64) (int64, int64) { - count, nanosec := pprof.ScaleMutexProfile(scaler, rcount, float64(rcycles)/cpuGHz) - inanosec := int64(nanosec) - return count, inanosec - } - dump := func(r ...runtime.BlockProfileRecord) *bytes.Buffer { - buf := bytes.NewBuffer(nil) - err := PrintCountCycleProfile(dh, opt, buf, scaler, r) - assert.NoError(t, err) - return buf - } - r := func(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord { - return runtime.BlockProfileRecord{ - Count: count, - Cycles: cycles, - StackRecord: runtime.StackRecord{ - Stack0: s, - }, - } - } + h := newMutexTestHelper() + h.scaler = scaler - p1 := dump( - r(0, 0, stack0), - r(0, 0, stack1), + p1 := h.dump( + h.r(0, 0, stack0), + h.r(0, 0, stack1), ) expectEmptyProfile(t, p1) const cycles = 42 - p2 := dump( - r(239, 239*cycles, stack0), - r(0, 0, stack1), + p2 := h.dump( + h.r(239, 239*cycles, stack0), + h.r(0, 0, stack1), ) - count0, nanos0 := scale(239, 239*cycles) + count0, nanos0 := h.scale(239, 239*cycles) expectStackFrames(t, p2, stack0Marker, count0, nanos0) expectNoStackFrames(t, p2, stack1Marker) for j := 0; j < 2; j++ { - p3 := dump( - r(239, 239*cycles, stack0), - r(0, 0, stack1), + p3 := h.dump( + h.r(239, 239*cycles, stack0), + h.r(0, 0, stack1), ) expectEmptyProfile(t, p3) } - count1, nanos1 := scale(240, 240*cycles) - p4 := dump( - r(240, 240*cycles, stack0), + count1, nanos1 := h.scale(240, 240*cycles) + p4 := h.dump( + h.r(240, 240*cycles, stack0), ) expectStackFrames(t, p4, stack0Marker, count1-count0, nanos1-nanos0) expectNoStackFrames(t, p4, stack1Marker) @@ -168,22 +168,23 @@ func TestDeltaBlockProfile(t *testing.T) { } func BenchmarkHeapDelta(b *testing.B) { - dh := new(pprof.DeltaHeapProfiler) - fs := generateMemProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - objSize := fs[0].AllocBytes / fs[0].AllocObjects - nMutations := int(uint(rng.Int63())) % len(fs) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) builder := &noopBuilder{} + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) + b.ResetTimer() for i := 0; i < b.N; i++ { - _ = dh.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate)) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].AllocObjects += 1 - fs[idx].AllocBytes += objSize - fs[idx].FreeObjects += 1 - fs[idx].FreeBytes += objSize + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate)) + h.mutate(nmutations, fs) } } @@ -198,37 +199,98 @@ func BenchmarkMutexDelta(b *testing.B) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - fs := generateBlockProfileRecords(512, 32, 239) - rng := rand.NewSource(239) - nMutations := int(uint(rng.Int63())) % len(fs) - oneBlockCycles := fs[0].Cycles / fs[0].Count + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) builder := &noopBuilder{} + h.rng.Seed(239) + nmutations := int(h.rng.Int63() % int64(len(fs))) b.ResetTimer() for i := 0; i < b.N; i++ { - _ = dh.PrintCountCycleProfile(builder, scaler, fs) - for j := 0; j < nMutations; j++ { - idx := int(uint(rng.Int63())) % len(fs) - fs[idx].Count += 1 - fs[idx].Cycles += oneBlockCycles + if i == 1000 { + v := h.rng.Int63() + if v != 7817861117094116717 { + b.Errorf("unexpected random value: %d. "+ + "The bench should be deterministic for better comparision.", v) + } } + _ = h.dp.PrintCountCycleProfile(builder, scaler, fs) + h.mutate(nmutations, fs) } }) } } -type noopBuilder struct { -} +func TestMutexDuplicates(t *testing.T) { + prev := runtime.SetMutexProfileFraction(-1) + runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(prev) -func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 { - return nil -} -func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) { + h := newMutexTestHelper() + const cycles = 42 + p := h.dump( + h.r(239, 239*cycles, stack0), + h.r(42, 42*cycles, stack1), + h.r(7, 7*cycles, stack0), + ) + expectStackFrames(t, p, stack0Marker, h.scale2(239+7, (239+7)*cycles)...) + expectStackFrames(t, p, stack1Marker, h.scale2(42, (42)*cycles)...) + + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, h.scale2(239+7, (239+7)*cycles)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, h.scale2(42, 42*cycles)...) + + p = h.dump( + h.r(239, 239*cycles, stack0), + h.r(42, 42*cycles, stack1), + h.r(7, 7*cycles, stack0), + ) + expectEmptyProfile(t, p) } -func (b *noopBuilder) Build() { +func TestHeapDuplicates(t *testing.T) { + const testMemProfileRate = 524288 + h := newHeapTestHelper() + h.rate = testMemProfileRate + const blockSize = 1024 + const blockSize2 = 2048 + p := h.dump( + h.r(239, 239*blockSize, 239, 239*blockSize, stack0), + h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0), + h.r(42, 42*blockSize, 42, 42*blockSize, stack1), + h.r(7, 7*blockSize, 7, 7*blockSize, stack0), + h.r(3, 3*blockSize, 3, 3*blockSize, stack2), + h.r(5, 5*blockSize, 5, 5*blockSize, stack3), + h.r(11, 11*blockSize, 11, 11*blockSize, stack4), + ) + pp, err := gprofile.ParseData(p.Bytes()) + assert.NoError(t, err) + scale := func(c, b int) []int64 { + c1, b1 := pprof.ScaleHeapSample(int64(c), int64(b), testMemProfileRate) + return []int64{c1, b1, 0, 0} + } + //expectStackFrames(t, p, stack0Marker, scale(239+7, (239+7)*blockSize)...) + //expectStackFrames(t, p, stack1Marker, scale(42, 42*blockSize)...) + + //printProfile(t, p) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, scale(239+7, (239+7)*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, scale(42, 42*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(3, 3*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(5, 5*blockSize)...) + expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(11, 11*blockSize)...) + assert.Equal(t, 6, len(pp.Sample)) + + p = h.dump( + h.r(239, 239*blockSize, 239, 239*blockSize, stack0), + h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0), + h.r(42, 42*blockSize, 42, 42*blockSize, stack1), + h.r(7, 7*blockSize, 7, 7*blockSize, stack0), + h.r(3, 3*blockSize, 3, 3*blockSize, stack2), + h.r(5, 5*blockSize, 5, 5*blockSize, stack3), + h.r(11, 11*blockSize, 11, 11*blockSize, stack4), + ) + expectEmptyProfile(t, p) } diff --git a/godeltaprof/compat/reject_order_test.go b/godeltaprof/compat/reject_order_test.go index 9bdd658..6266b78 100644 --- a/godeltaprof/compat/reject_order_test.go +++ b/godeltaprof/compat/reject_order_test.go @@ -13,11 +13,10 @@ import ( ) func TestHeapReject(t *testing.T) { - dh := new(pprof.DeltaHeapProfiler) - opt := new(pprof.ProfileBuilderOptions) - fs := generateMemProfileRecords(512, 32, 239) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) p1 := bytes.NewBuffer(nil) - err := WriteHeapProto(dh, opt, p1, fs, int64(runtime.MemProfileRate)) + err := WriteHeapProto(h.dp, h.opt, p1, fs, int64(runtime.MemProfileRate)) assert.NoError(t, err) p1Size := p1.Len() profile, err := gprofile.Parse(p1) @@ -28,7 +27,7 @@ func TestHeapReject(t *testing.T) { t.Log("p1 size", p1Size) p2 := bytes.NewBuffer(nil) - err = WriteHeapProto(dh, opt, p2, fs, int64(runtime.MemProfileRate)) + err = WriteHeapProto(h.dp, h.opt, p2, fs, int64(runtime.MemProfileRate)) assert.NoError(t, err) p2Size := p2.Len() assert.Less(t, p2Size, 1000) @@ -41,15 +40,11 @@ func TestHeapReject(t *testing.T) { } func BenchmarkHeapRejectOrder(b *testing.B) { - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: false, - LazyMapping: true, - } - dh := &pprof.DeltaHeapProfiler{} - fs := generateMemProfileRecords(512, 32, 239) + h := newHeapTestHelper() + fs := h.generateMemProfileRecords(512, 32) b.ResetTimer() for i := 0; i < b.N; i++ { - WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate)) + WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate)) } } @@ -69,11 +64,11 @@ func TestMutexReject(t *testing.T) { runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - dh := new(pprof.DeltaMutexProfiler) - opt := new(pprof.ProfileBuilderOptions) - fs := generateBlockProfileRecords(512, 32, 239) + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) p1 := bytes.NewBuffer(nil) - err := PrintCountCycleProfile(dh, opt, p1, scaler, fs) + err := PrintCountCycleProfile(h.dp, h.opt, p1, scaler, fs) assert.NoError(t, err) p1Size := p1.Len() profile, err := gprofile.Parse(p1) @@ -84,7 +79,7 @@ func TestMutexReject(t *testing.T) { t.Log("p1 size", p1Size) p2 := bytes.NewBuffer(nil) - err = PrintCountCycleProfile(dh, opt, p2, scaler, fs) + err = PrintCountCycleProfile(h.dp, h.opt, p2, scaler, fs) assert.NoError(t, err) p2Size := p2.Len() assert.Less(t, p2Size, 1000) @@ -108,16 +103,13 @@ func BenchmarkMutexRejectOrder(b *testing.B) { prevMutexProfileFraction := runtime.SetMutexProfileFraction(-1) runtime.SetMutexProfileFraction(5) defer runtime.SetMutexProfileFraction(prevMutexProfileFraction) - opt := &pprof.ProfileBuilderOptions{ - GenericsFrames: false, - LazyMapping: true, - } - dh := &pprof.DeltaMutexProfiler{} - fs := generateBlockProfileRecords(512, 32, 239) + h := newMutexTestHelper() + h.scaler = scaler + fs := h.generateBlockProfileRecords(512, 32) b.ResetTimer() for i := 0; i < b.N; i++ { - PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs) + PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs) } }) diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go index 7051ab5..8f95c17 100644 --- a/godeltaprof/compat/stackcollapse.go +++ b/godeltaprof/compat/stackcollapse.go @@ -2,7 +2,9 @@ package compat import ( "bytes" + "fmt" "io" + "reflect" "regexp" "sort" "strings" @@ -26,6 +28,16 @@ func expectEmptyProfile(t *testing.T, buffer io.Reader) { assert.Empty(t, ls) } +func printProfile(t *testing.T, p *bytes.Buffer) { + profile, err := gprofile.Parse(bytes.NewBuffer(p.Bytes())) + require.NoError(t, err) + t.Log("==================") + for _, sample := range profile.Sample { + s := pprofSampleStackToString(sample) + t.Logf("%v %v %v\n", s, sample.Value, sample.NumLabel) + } +} + func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) { profile, err := gprofile.ParseData(buffer.Bytes()) require.NoError(t, err) @@ -34,6 +46,7 @@ func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) { } func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, values ...int64) { + fmt.Printf("expectStackFrames: %s %+v\n", sfPattern, values) profile, err := gprofile.ParseData(buffer.Bytes()) require.NoError(t, err) line := findStack(t, stackCollapseProfile(t, profile), sfPattern) @@ -45,6 +58,32 @@ func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, val } } +func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern string, expectedCount int, expectedValues ...int64) { + profile, err := gprofile.ParseData(buffer.Bytes()) + require.NoError(t, err) + cnt := 0 + samples := grepSamples(profile, samplePattern) + for _, s := range samples { + if reflect.DeepEqual(s.Value, expectedValues) { + cnt++ + } + } + assert.Equalf(t, expectedCount, cnt, "expected samples re: %s\n values: %v\n count:%d\n all samples:%+v\n", samplePattern, expectedValues, expectedCount, samples) +} + +func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sample { + rr := regexp.MustCompile(samplePattern) + var samples []*gprofile.Sample + for i := range profile.Sample { + ss := pprofSampleStackToString(profile.Sample[i]) + if !rr.MatchString(ss) { + continue + } + samples = append(samples, profile.Sample[i]) + } + return samples +} + func findStack(t *testing.T, res []stack, re string) *stack { rr := regexp.MustCompile(re) for i, re := range res { @@ -58,23 +97,9 @@ func findStack(t *testing.T, res []stack, re string) *stack { func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack { var ret []stack for _, s := range p.Sample { - var funcs []string - for i := range s.Location { - - loc := s.Location[i] - for _, line := range loc.Line { - f := line.Function - //funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line)) - funcs = append(funcs, f.Name) - } - } - for i := 0; i < len(funcs)/2; i++ { - j := len(funcs) - i - 1 - funcs[i], funcs[j] = funcs[j], funcs[i] - } - + funcs, strSample := pprofSampleStackToStrings(s) ret = append(ret, stack{ - line: strings.Join(funcs, ";"), + line: strSample, funcs: funcs, value: s.Value, }) @@ -97,11 +122,36 @@ func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack { unique = append(unique, s) } - t.Log("============= stackCollapseProfile ================") - for _, s := range unique { - t.Log(s.line, s.value) - } - t.Log("===================================================") + //t.Log("============= stackCollapseProfile ================") + //for _, s := range unique { + // t.Log(s.line, s.value) + //} + //t.Log("===================================================") return unique } + +func pprofSampleStackToString(s *gprofile.Sample) string { + _, v := pprofSampleStackToStrings(s) + return v +} + +func pprofSampleStackToStrings(s *gprofile.Sample) ([]string, string) { + var funcs []string + for i := range s.Location { + + loc := s.Location[i] + for _, line := range loc.Line { + f := line.Function + //funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line)) + funcs = append(funcs, f.Name) + } + } + for i := 0; i < len(funcs)/2; i++ { + j := len(funcs) - i - 1 + funcs[i], funcs[j] = funcs[j], funcs[i] + } + + strSample := strings.Join(funcs, ";") + return funcs, strSample +} diff --git a/godeltaprof/compat/stub_go23_test.go b/godeltaprof/compat/stub_go23_test.go index bf495a9..6c2d227 100644 --- a/godeltaprof/compat/stub_go23_test.go +++ b/godeltaprof/compat/stub_go23_test.go @@ -36,6 +36,9 @@ func TestRuntimeCyclesPerSecond(t *testing.T) { checkSignature(t, "runtime", "pprof_cyclesPerSecond", "func runtime.pprof_cyclesPerSecond() int64") + checkSignature(t, "runtime/pprof", + "pprof_cyclesPerSecond", + "func runtime/pprof.pprof_cyclesPerSecond() int64") checkSignature(t, "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof", "runtime_cyclesPerSecond", "func github.com/grafana/pyroscope-go/godeltaprof/internal/pprof.runtime_cyclesPerSecond() int64") diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go index 509ba47..be95c9e 100644 --- a/godeltaprof/compat/testdata.go +++ b/godeltaprof/compat/testdata.go @@ -1,16 +1,13 @@ package compat import ( - "go/types" + "bytes" + "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof" + "github.com/stretchr/testify/assert" + "io" "math/rand" "reflect" "runtime" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "golang.org/x/tools/go/packages" ) func getFunctionPointers() []uintptr { @@ -160,12 +157,12 @@ func getFunctionPointers() []uintptr { } } -func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord { +func (h *heapTestHelper) generateMemProfileRecords(n, depth int) []runtime.MemProfileRecord { var records []runtime.MemProfileRecord - rng := rand.NewSource(int64(seed)) + fs := getFunctionPointers() for i := 0; i < n; i++ { - nobj := int(uint64(rng.Int63())) % 1000000 + nobj := int(uint64(h.rng.Int63())) % 1000000 r := runtime.MemProfileRecord{ AllocObjects: int64(nobj), AllocBytes: int64(nobj * 1024), @@ -173,53 +170,182 @@ func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord { FreeBytes: int64(nobj * 1024), } for j := 0; j < depth; j++ { - r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)] + r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)] } records = append(records, r) } return records } -func generateBlockProfileRecords(n, depth, seed int) []runtime.BlockProfileRecord { +func (h *mutexTestHelper) generateBlockProfileRecords(n, depth int) []runtime.BlockProfileRecord { var records []runtime.BlockProfileRecord - rng := rand.NewSource(int64(seed)) fs := getFunctionPointers() for i := 0; i < n; i++ { - nobj := int(uint64(rng.Int63())) % 1000000 + nobj := int(uint64(h.rng.Int63())) % 1000000 r := runtime.BlockProfileRecord{ Count: int64(nobj), Cycles: int64(nobj * 10), } for j := 0; j < depth; j++ { - r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)] + r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)] } records = append(records, r) } return records } -func getFunctions(t testing.TB, pkg string) []*types.Func { - var res []*types.Func - cfg := &packages.Config{ - Mode: packages.NeedImports | packages.NeedExportFile | packages.NeedTypes | packages.NeedSyntax, - Tests: true, +type mutexTestHelper struct { + dp *pprof.DeltaMutexProfiler + opt *pprof.ProfileBuilderOptions + scaler pprof.MutexProfileScaler + rng rand.Source +} + +func newMutexTestHelper() *mutexTestHelper { + res := &mutexTestHelper{ + dp: &pprof.DeltaMutexProfiler{}, + opt: &pprof.ProfileBuilderOptions{ + GenericsFrames: true, + LazyMapping: true, + }, + scaler: pprof.ScalerMutexProfile, + rng: rand.NewSource(239), } - pkgs, err := packages.Load(cfg, pkg) - require.NoError(t, err) - for _, p := range pkgs { - if strings.Contains(p.ID, ".test") { - continue - } - for _, name := range p.Types.Scope().Names() { - f := p.Types.Scope().Lookup(name) - - if f != nil { - ff, ok := f.(*types.Func) - if ok { - res = append(res, ff) - } - } - } + return res + +} + +func (h *mutexTestHelper) scale(rcount, rcycles int64) (int64, int64) { + cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9 + count, nanosec := pprof.ScaleMutexProfile(h.scaler, rcount, float64(rcycles)/cpuGHz) + inanosec := int64(nanosec) + return count, inanosec +} + +func (h *mutexTestHelper) scale2(rcount, rcycles int64) []int64 { + c, n := h.scale(rcount, rcycles) + return []int64{c, n} +} + +func (h *mutexTestHelper) dump(r ...runtime.BlockProfileRecord) *bytes.Buffer { + buf := bytes.NewBuffer(nil) + err := PrintCountCycleProfile(h.dp, h.opt, buf, h.scaler, r) + if err != nil { // never happens + panic(err) + } + return buf +} + +func (h *mutexTestHelper) r(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord { + return runtime.BlockProfileRecord{ + Count: count, + Cycles: cycles, + StackRecord: runtime.StackRecord{ + Stack0: s, + }, + } +} + +func (h *mutexTestHelper) mutate(nmutations int, fs []runtime.BlockProfileRecord) { + oneBlockCycles := fs[0].Cycles / fs[0].Count + for j := 0; j < nmutations; j++ { + idx := int(uint(h.rng.Int63())) % len(fs) + fs[idx].Count += 1 + fs[idx].Cycles += oneBlockCycles + } +} + +type heapTestHelper struct { + dp *pprof.DeltaHeapProfiler + opt *pprof.ProfileBuilderOptions + rate int64 + rng rand.Source +} + +func newHeapTestHelper() *heapTestHelper { + res := &heapTestHelper{ + dp: &pprof.DeltaHeapProfiler{}, + opt: &pprof.ProfileBuilderOptions{ + GenericsFrames: true, + LazyMapping: true, + }, + rng: rand.NewSource(239), + rate: int64(runtime.MemProfileRate), } return res } + +func (h *heapTestHelper) dump(r ...runtime.MemProfileRecord) *bytes.Buffer { + buf := bytes.NewBuffer(nil) + err := WriteHeapProto(h.dp, h.opt, buf, r, h.rate) + if err != nil { // never happens + panic(err) + } + return buf +} + +func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord { + return runtime.MemProfileRecord{ + AllocObjects: AllocObjects, + AllocBytes: AllocBytes, + FreeBytes: FreeBytes, + FreeObjects: FreeObjects, + Stack0: s, + } +} + +func (h *heapTestHelper) mutate(nmutations int, fs []runtime.MemProfileRecord) { + objSize := fs[0].AllocBytes / fs[0].AllocObjects + for j := 0; j < nmutations; j++ { + idx := int(uint(h.rng.Int63())) % len(fs) + fs[idx].AllocObjects += 1 + fs[idx].AllocBytes += objSize + fs[idx].FreeObjects += 1 + fs[idx].FreeBytes += objSize + } +} + +func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error { + stc := pprof.HeapProfileConfig(rate) + b := pprof.NewProfileBuilder(w, opt, stc) + return dp.WriteHeapProto(b, p, rate) +} + +func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error { + stc := pprof.MutexProfileConfig() + b := pprof.NewProfileBuilder(w, opt, stc) + return d.PrintCountCycleProfile(b, scaler, records) +} + +func dumpMemProfileRecords() []runtime.MemProfileRecord { + var p []runtime.MemProfileRecord + n, ok := runtime.MemProfile(nil, true) + for { + // Allocate room for a slightly bigger profile, + // in case a few more entries have been added + // since the call to MemProfile. + p = make([]runtime.MemProfileRecord, n+50) + n, ok = runtime.MemProfile(p, true) + if ok { + p = p[0:n] + break + } + // Profile grew; try again. + } + return p +} + +type noopBuilder struct { +} + +func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 { + return nil +} + +func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) { + +} + +func (b *noopBuilder) Build() { + +} diff --git a/godeltaprof/go.mod b/godeltaprof/go.mod index 0fbc602..e36d42c 100644 --- a/godeltaprof/go.mod +++ b/godeltaprof/go.mod @@ -1,5 +1,5 @@ module github.com/grafana/pyroscope-go/godeltaprof -go 1.16 +go 1.18 -require github.com/klauspost/compress v1.17.3 +require github.com/klauspost/compress v1.17.8 diff --git a/godeltaprof/go.sum b/godeltaprof/go.sum index 0d8e8f5..734f917 100644 --- a/godeltaprof/go.sum +++ b/godeltaprof/go.sum @@ -1,2 +1,2 @@ -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= +github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go index 0392382..883d020 100644 --- a/godeltaprof/internal/pprof/delta_heap.go +++ b/godeltaprof/internal/pprof/delta_heap.go @@ -6,16 +6,27 @@ import ( "strings" ) +type heapPrevValue struct { + allocObjects int64 +} + +type heapAccValue struct { + allocObjects int64 + inuseObjects int64 +} + type DeltaHeapProfiler struct { - m profMap + m profMap[heapPrevValue, heapAccValue] + //todo consider adding an option to remove block size label and merge allocations of different size } // WriteHeapProto writes the current heap profile in protobuf format to w. func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProfileRecord, rate int64) error { values := []int64{0, 0, 0, 0} var locs []uint64 - for _, r := range p { - // do the delta + // deduplicate: accumulate allocObjects and inuseObjects in entry.acc for equal stacks + for i := range p { + r := &p[i] if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 { // it is a fresh bucket and it will be published after next 1-2 gc cycles continue @@ -25,17 +36,42 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf blockSize = r.AllocBytes / r.AllocObjects } entry := d.m.Lookup(r.Stack(), uintptr(blockSize)) + entry.acc.allocObjects += r.AllocObjects + entry.acc.inuseObjects += r.InUseObjects() + } + // do the delta using the accumulated values and previous values + for i := range p { + r := &p[i] + if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 { + // it is a fresh bucket and it will be published after next 1-2 gc cycles + continue + } + var blockSize int64 + if r.AllocObjects > 0 { + blockSize = r.AllocBytes / r.AllocObjects + } + entry := d.m.Lookup(r.Stack(), uintptr(blockSize)) + if entry.acc == (heapAccValue{}) { + continue + } - if (r.AllocObjects - entry.count.v1) < 0 { + allocObjects := entry.acc.allocObjects - entry.prev.allocObjects + if allocObjects < 0 { continue } - AllocObjects := r.AllocObjects - entry.count.v1 - AllocBytes := r.AllocBytes - entry.count.v2 - entry.count.v1 = r.AllocObjects - entry.count.v2 = r.AllocBytes - values[0], values[1] = scaleHeapSample(AllocObjects, AllocBytes, rate) - values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate) + // allocBytes, inuseBytes is calculated as multiplication of number of objects by blockSize + // This is done to reduce the size of the map entry (i.e. heapAccValue for deduplication and + // heapPrevValue for keeping the delta). + + allocBytes := allocObjects * blockSize + entry.prev.allocObjects = entry.acc.allocObjects + inuseBytes := entry.acc.inuseObjects * blockSize + + values[0], values[1] = ScaleHeapSample(allocObjects, allocBytes, rate) + values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, inuseBytes, rate) + + entry.acc = heapAccValue{} if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 { continue @@ -70,7 +106,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf return nil } -// scaleHeapSample adjusts the data from a heap Sample to +// ScaleHeapSample adjusts the data from a heap Sample to // account for its probability of appearing in the collected // data. heap profiles are a sampling of the memory allocations // requests in a program. We estimate the unsampled value by dividing @@ -79,7 +115,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf // which samples to collect, based on the desired average collection // rate R. The probability of a sample of size S to appear in that // profile is 1-exp(-S/R). -func scaleHeapSample(count, size, rate int64) (int64, int64) { +func ScaleHeapSample(count, size, rate int64) (int64, int64) { if count == 0 || size == 0 { return 0, 0 } diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go index 0b2500a..5c177e3 100644 --- a/godeltaprof/internal/pprof/delta_mutex.go +++ b/godeltaprof/internal/pprof/delta_mutex.go @@ -4,8 +4,18 @@ import ( "runtime" ) +type mutexPrevValue struct { + count int64 + inanosec int64 +} + +type mutexAccValue struct { + count int64 + cycles int64 +} + type DeltaMutexProfiler struct { - m profMap + m profMap[mutexPrevValue, mutexAccValue] } // PrintCountCycleProfile outputs block profile records (for block or mutex profiles) @@ -19,16 +29,33 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut values := []int64{0, 0} var locs []uint64 - for _, r := range records { - count, nanosec := ScaleMutexProfile(scaler, r.Count, float64(r.Cycles)/cpuGHz) + // deduplicate: accumulate count and cycles in entry.acc for equal stacks + for i := range records { + r := &records[i] + entry := d.m.Lookup(r.Stack(), 0) + entry.acc.count += r.Count // accumulate unscaled + entry.acc.cycles += r.Cycles + } + + // do the delta using the accumulated values and previous values + for i := range records { + r := &records[i] + stk := r.Stack() + entry := d.m.Lookup(stk, 0) + accCount := entry.acc.count + accCycles := entry.acc.cycles + if accCount == 0 && accCycles == 0 { + continue + } + entry.acc = mutexAccValue{} + count, nanosec := ScaleMutexProfile(scaler, accCount, float64(accCycles)/cpuGHz) inanosec := int64(nanosec) // do the delta - entry := d.m.Lookup(r.Stack(), 0) - values[0] = count - entry.count.v1 - values[1] = inanosec - entry.count.v2 - entry.count.v1 = count - entry.count.v2 = inanosec + values[0] = count - entry.prev.count + values[1] = inanosec - entry.prev.inanosec + entry.prev.count = count + entry.prev.inanosec = inanosec if values[0] < 0 || values[1] < 0 { continue @@ -39,7 +66,7 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut // For count profiles, all stack addresses are // return PCs, which is what appendLocsForStack expects. - locs = b.LocsForStack(r.Stack()) + locs = b.LocsForStack(stk) b.Sample(values, locs, 0) } b.Build() diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go index 188001e..dcb6e56 100644 --- a/godeltaprof/internal/pprof/map.go +++ b/godeltaprof/internal/pprof/map.go @@ -8,30 +8,23 @@ import "unsafe" // A profMap is a map from (stack, tag) to mapEntry. // It grows without bound, but that's assumed to be OK. -type profMap struct { - hash map[uintptr]*profMapEntry - all *profMapEntry - last *profMapEntry - free []profMapEntry +type profMap[PREV any, ACC any] struct { + hash map[uintptr]*profMapEntry[PREV, ACC] + free []profMapEntry[PREV, ACC] freeStk []uintptr } -type count struct { - // alloc_objects, alloc_bytes for heap - // mutex_count, mutex_duration for mutex - v1, v2 int64 -} - // A profMapEntry is a single entry in the profMap. -type profMapEntry struct { - nextHash *profMapEntry // next in hash list - nextAll *profMapEntry // next in list of all entries +// todo use unsafe.Pointer + len for stk ? +type profMapEntry[PREV any, ACC any] struct { + nextHash *profMapEntry[PREV, ACC] // next in hash list stk []uintptr tag uintptr - count count + prev PREV + acc ACC } -func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry { +func (m *profMap[PREV, ACC]) Lookup(stk []uintptr, tag uintptr) *profMapEntry[PREV, ACC] { // Compute hash of (stk, tag). h := uintptr(0) for _, x := range stk { @@ -42,7 +35,7 @@ func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry { h += uintptr(tag) * 41 // Find entry if present. - var last *profMapEntry + var last *profMapEntry[PREV, ACC] Search: for e := m.hash[h]; e != nil; last, e = e, e.nextHash { if len(e.stk) != len(stk) || e.tag != tag { @@ -64,7 +57,7 @@ Search: // Add new entry. if len(m.free) < 1 { - m.free = make([]profMapEntry, 128) + m.free = make([]profMapEntry[PREV, ACC], 128) } e := &m.free[0] m.free = m.free[1:] @@ -82,15 +75,8 @@ Search: e.stk[j] = uintptr(stk[j]) } if m.hash == nil { - m.hash = make(map[uintptr]*profMapEntry) + m.hash = make(map[uintptr]*profMapEntry[PREV, ACC]) } m.hash[h] = e - if m.all == nil { - m.all = e - m.last = e - } else { - m.last.nextAll = e - m.last = e - } return e } diff --git a/godeltaprof/internal/pprof/stub_go23.go b/godeltaprof/internal/pprof/stub_go23.go index 37ccbe8..d587cad 100644 --- a/godeltaprof/internal/pprof/stub_go23.go +++ b/godeltaprof/internal/pprof/stub_go23.go @@ -23,5 +23,5 @@ func runtime_FrameSymbolName(f *runtime.Frame) string //go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr -//go:linkname runtime_cyclesPerSecond runtime.pprof_cyclesPerSecond +//go:linkname runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func runtime_cyclesPerSecond() int64