From 65a479df7b80d23b0c26dd17a16c461a3bd2b63a Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 13:07:43 +0700
Subject: [PATCH 01/33] make tests a bit more reusable

---
 godeltaprof/compat/compression_test.go  |  56 ++------
 godeltaprof/compat/delta_test.go        | 153 ++++++---------------
 godeltaprof/compat/reject_order_test.go |  40 +++---
 godeltaprof/compat/testdata.go          | 175 +++++++++++++++++++-----
 4 files changed, 204 insertions(+), 220 deletions(-)

diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go
index e9305e4..ceeb4b7 100644
--- a/godeltaprof/compat/compression_test.go
+++ b/godeltaprof/compat/compression_test.go
@@ -2,33 +2,17 @@ package compat
 
 import (
 	"io"
-	"math/rand"
 	"runtime"
 	"testing"
-
-	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
 )
 
 func BenchmarkHeapCompression(b *testing.B) {
-	opt := &pprof.ProfileBuilderOptions{
-		GenericsFrames: true,
-		LazyMapping:    true,
-	}
-	dh := new(pprof.DeltaHeapProfiler)
-	fs := generateMemProfileRecords(512, 32, 239)
-	rng := rand.NewSource(239)
-	objSize := fs[0].AllocBytes / fs[0].AllocObjects
-	nMutations := int(uint(rng.Int63())) % len(fs)
+	h := newHeapTestHelper()
+	fs := h.generateMemProfileRecords(512, 32)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		_ = WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate))
-		for j := 0; j < nMutations; j++ {
-			idx := int(uint(rng.Int63())) % len(fs)
-			fs[idx].AllocObjects += 1
-			fs[idx].AllocBytes += objSize
-			fs[idx].FreeObjects += 1
-			fs[idx].FreeBytes += objSize
-		}
+		_ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate))
+		h.mutate(fs)
 	}
 }
 
@@ -43,38 +27,16 @@ func BenchmarkMutexCompression(b *testing.B) {
 			runtime.SetMutexProfileFraction(5)
 			defer runtime.SetMutexProfileFraction(prevMutexProfileFraction)
 
-			opt := &pprof.ProfileBuilderOptions{
-				GenericsFrames: true,
-				LazyMapping:    true,
-			}
-			dh := new(pprof.DeltaMutexProfiler)
-			fs := generateBlockProfileRecords(512, 32, 239)
-			rng := rand.NewSource(239)
-			nMutations := int(uint(rng.Int63())) % len(fs)
-			oneBlockCycles := fs[0].Cycles / fs[0].Count
+			h := newMutexTestHelper()
+			h.scaler = scaler
+			fs := h.generateBlockProfileRecords(512, 32)
 			b.ResetTimer()
 
 			for i := 0; i < b.N; i++ {
-				_ = PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs)
-				for j := 0; j < nMutations; j++ {
-					idx := int(uint(rng.Int63())) % len(fs)
-					fs[idx].Count += 1
-					fs[idx].Cycles += oneBlockCycles
-				}
+				_ = PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs)
+				h.mutate(fs)
 			}
 		})
 
 	}
 }
-
-func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error {
-	stc := pprof.HeapProfileConfig(rate)
-	b := pprof.NewProfileBuilder(w, opt, stc)
-	return dp.WriteHeapProto(b, p, rate)
-}
-
-func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error {
-	stc := pprof.MutexProfileConfig()
-	b := pprof.NewProfileBuilder(w, opt, stc)
-	return d.PrintCountCycleProfile(b, scaler, records)
-}
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 1dded69..a528c6d 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -1,13 +1,8 @@
 package compat
 
 import (
-	"bytes"
-	"math/rand"
 	"runtime"
 	"testing"
-
-	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
-	"github.com/stretchr/testify/assert"
 )
 
 var (
@@ -40,56 +35,40 @@ func TestDeltaHeap(t *testing.T) {
 	const testMemProfileRate = 524288
 	const testObjectSize = 327680
 
-	dh := new(pprof.DeltaHeapProfiler)
-	opt := new(pprof.ProfileBuilderOptions)
-	dump := func(r ...runtime.MemProfileRecord) *bytes.Buffer {
-		buf := bytes.NewBuffer(nil)
-		err := WriteHeapProto(dh, opt, buf, r, testMemProfileRate)
-		assert.NoError(t, err)
-		return buf
-	}
-	r := func(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord {
-		return runtime.MemProfileRecord{
-			AllocObjects: AllocObjects,
-			AllocBytes:   AllocBytes,
-			FreeBytes:    FreeBytes,
-			FreeObjects:  FreeObjects,
-			Stack0:       s,
-		}
-	}
+	h := newHeapTestHelper()
 
-	p1 := dump(
-		r(0, 0, 0, 0, stack0),
-		r(0, 0, 0, 0, stack1),
+	p1 := h.dump(
+		h.r(0, 0, 0, 0, stack0),
+		h.r(0, 0, 0, 0, stack1),
 	)
 	expectEmptyProfile(t, p1)
 
-	p2 := dump(
-		r(5, 5*testObjectSize, 0, 0, stack0),
-		r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
+	p2 := h.dump(
+		h.r(5, 5*testObjectSize, 0, 0, stack0),
+		h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
 	)
 	expectStackFrames(t, p2, stack0Marker, 10, 3525422, 10, 3525422)
 	expectStackFrames(t, p2, stack1Marker, 6, 2115253, 0, 0)
 
 	for i := 0; i < 3; i++ {
 		// if we write same data, stack0 is in use, stack1 should not be present
-		p3 := dump(
-			r(5, 5*testObjectSize, 0, 0, stack0),
-			r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
+		p3 := h.dump(
+			h.r(5, 5*testObjectSize, 0, 0, stack0),
+			h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
 		)
 		expectStackFrames(t, p3, stack0Marker, 0, 0, 10, 3525422)
 		expectNoStackFrames(t, p3, stack1Marker)
 	}
 
-	p4 := dump(
-		r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0),
-		r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
+	p4 := h.dump(
+		h.r(5, 5*testObjectSize, 5, 5*testObjectSize, stack0),
+		h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
 	)
 	expectEmptyProfile(t, p4)
 
-	p5 := dump(
-		r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0),
-		r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
+	p5 := h.dump(
+		h.r(8, 8*testObjectSize, 5, 5*testObjectSize, stack0),
+		h.r(3, 3*testObjectSize, 3, 3*testObjectSize, stack1),
 	)
 	// note, this value depends on scale order, it currently tests the current implementation, but we may change it
 	// to alloc objects to be scale(8) - scale(5) = 17-10 = 7
@@ -98,8 +77,6 @@ func TestDeltaHeap(t *testing.T) {
 }
 
 func TestDeltaBlockProfile(t *testing.T) {
-	cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9
-
 	for i, scaler := range mutexProfileScalers {
 		name := "ScalerMutexProfile"
 		if i == 1 {
@@ -110,56 +87,35 @@ func TestDeltaBlockProfile(t *testing.T) {
 			runtime.SetMutexProfileFraction(5)
 			defer runtime.SetMutexProfileFraction(prevMutexProfileFraction)
 
-			dh := new(pprof.DeltaMutexProfiler)
-			opt := new(pprof.ProfileBuilderOptions)
+			h := newMutexTestHelper()
+			h.scaler = scaler
 
-			scale := func(rcount, rcycles int64) (int64, int64) {
-				count, nanosec := pprof.ScaleMutexProfile(scaler, rcount, float64(rcycles)/cpuGHz)
-				inanosec := int64(nanosec)
-				return count, inanosec
-			}
-			dump := func(r ...runtime.BlockProfileRecord) *bytes.Buffer {
-				buf := bytes.NewBuffer(nil)
-				err := PrintCountCycleProfile(dh, opt, buf, scaler, r)
-				assert.NoError(t, err)
-				return buf
-			}
-			r := func(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord {
-				return runtime.BlockProfileRecord{
-					Count:  count,
-					Cycles: cycles,
-					StackRecord: runtime.StackRecord{
-						Stack0: s,
-					},
-				}
-			}
-
-			p1 := dump(
-				r(0, 0, stack0),
-				r(0, 0, stack1),
+			p1 := h.dump(
+				h.r(0, 0, stack0),
+				h.r(0, 0, stack1),
 			)
 			expectEmptyProfile(t, p1)
 
 			const cycles = 42
-			p2 := dump(
-				r(239, 239*cycles, stack0),
-				r(0, 0, stack1),
+			p2 := h.dump(
+				h.r(239, 239*cycles, stack0),
+				h.r(0, 0, stack1),
 			)
-			count0, nanos0 := scale(239, 239*cycles)
+			count0, nanos0 := h.scale(239, 239*cycles)
 			expectStackFrames(t, p2, stack0Marker, count0, nanos0)
 			expectNoStackFrames(t, p2, stack1Marker)
 
 			for j := 0; j < 2; j++ {
-				p3 := dump(
-					r(239, 239*cycles, stack0),
-					r(0, 0, stack1),
+				p3 := h.dump(
+					h.r(239, 239*cycles, stack0),
+					h.r(0, 0, stack1),
 				)
 				expectEmptyProfile(t, p3)
 			}
 
-			count1, nanos1 := scale(240, 240*cycles)
-			p4 := dump(
-				r(240, 240*cycles, stack0),
+			count1, nanos1 := h.scale(240, 240*cycles)
+			p4 := h.dump(
+				h.r(240, 240*cycles, stack0),
 			)
 			expectStackFrames(t, p4, stack0Marker, count1-count0, nanos1-nanos0)
 			expectNoStackFrames(t, p4, stack1Marker)
@@ -168,22 +124,13 @@ func TestDeltaBlockProfile(t *testing.T) {
 }
 
 func BenchmarkHeapDelta(b *testing.B) {
-	dh := new(pprof.DeltaHeapProfiler)
-	fs := generateMemProfileRecords(512, 32, 239)
-	rng := rand.NewSource(239)
-	objSize := fs[0].AllocBytes / fs[0].AllocObjects
-	nMutations := int(uint(rng.Int63())) % len(fs)
+	h := newHeapTestHelper()
+	fs := h.generateMemProfileRecords(512, 32)
 	builder := &noopBuilder{}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		_ = dh.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate))
-		for j := 0; j < nMutations; j++ {
-			idx := int(uint(rng.Int63())) % len(fs)
-			fs[idx].AllocObjects += 1
-			fs[idx].AllocBytes += objSize
-			fs[idx].FreeObjects += 1
-			fs[idx].FreeBytes += objSize
-		}
+		_ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate))
+		h.mutate(fs)
 	}
 }
 
@@ -198,37 +145,17 @@ func BenchmarkMutexDelta(b *testing.B) {
 			runtime.SetMutexProfileFraction(5)
 			defer runtime.SetMutexProfileFraction(prevMutexProfileFraction)
 
-			dh := new(pprof.DeltaMutexProfiler)
-			fs := generateBlockProfileRecords(512, 32, 239)
-			rng := rand.NewSource(239)
-			nMutations := int(uint(rng.Int63())) % len(fs)
-			oneBlockCycles := fs[0].Cycles / fs[0].Count
+			h := newMutexTestHelper()
+			h.scaler = scaler
+			fs := h.generateBlockProfileRecords(512, 32)
 			builder := &noopBuilder{}
 			b.ResetTimer()
 
 			for i := 0; i < b.N; i++ {
-				_ = dh.PrintCountCycleProfile(builder, scaler, fs)
-				for j := 0; j < nMutations; j++ {
-					idx := int(uint(rng.Int63())) % len(fs)
-					fs[idx].Count += 1
-					fs[idx].Cycles += oneBlockCycles
-				}
+				_ = h.dp.PrintCountCycleProfile(builder, scaler, fs)
+				h.mutate(fs)
 			}
 		})
 
 	}
 }
-
-type noopBuilder struct {
-}
-
-func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 {
-	return nil
-}
-func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) {
-
-}
-
-func (b *noopBuilder) Build() {
-
-}
diff --git a/godeltaprof/compat/reject_order_test.go b/godeltaprof/compat/reject_order_test.go
index 9bdd658..6266b78 100644
--- a/godeltaprof/compat/reject_order_test.go
+++ b/godeltaprof/compat/reject_order_test.go
@@ -13,11 +13,10 @@ import (
 )
 
 func TestHeapReject(t *testing.T) {
-	dh := new(pprof.DeltaHeapProfiler)
-	opt := new(pprof.ProfileBuilderOptions)
-	fs := generateMemProfileRecords(512, 32, 239)
+	h := newHeapTestHelper()
+	fs := h.generateMemProfileRecords(512, 32)
 	p1 := bytes.NewBuffer(nil)
-	err := WriteHeapProto(dh, opt, p1, fs, int64(runtime.MemProfileRate))
+	err := WriteHeapProto(h.dp, h.opt, p1, fs, int64(runtime.MemProfileRate))
 	assert.NoError(t, err)
 	p1Size := p1.Len()
 	profile, err := gprofile.Parse(p1)
@@ -28,7 +27,7 @@ func TestHeapReject(t *testing.T) {
 	t.Log("p1 size", p1Size)
 
 	p2 := bytes.NewBuffer(nil)
-	err = WriteHeapProto(dh, opt, p2, fs, int64(runtime.MemProfileRate))
+	err = WriteHeapProto(h.dp, h.opt, p2, fs, int64(runtime.MemProfileRate))
 	assert.NoError(t, err)
 	p2Size := p2.Len()
 	assert.Less(t, p2Size, 1000)
@@ -41,15 +40,11 @@ func TestHeapReject(t *testing.T) {
 }
 
 func BenchmarkHeapRejectOrder(b *testing.B) {
-	opt := &pprof.ProfileBuilderOptions{
-		GenericsFrames: false,
-		LazyMapping:    true,
-	}
-	dh := &pprof.DeltaHeapProfiler{}
-	fs := generateMemProfileRecords(512, 32, 239)
+	h := newHeapTestHelper()
+	fs := h.generateMemProfileRecords(512, 32)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		WriteHeapProto(dh, opt, io.Discard, fs, int64(runtime.MemProfileRate))
+		WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate))
 	}
 }
 
@@ -69,11 +64,11 @@ func TestMutexReject(t *testing.T) {
 			runtime.SetMutexProfileFraction(5)
 			defer runtime.SetMutexProfileFraction(prevMutexProfileFraction)
 
-			dh := new(pprof.DeltaMutexProfiler)
-			opt := new(pprof.ProfileBuilderOptions)
-			fs := generateBlockProfileRecords(512, 32, 239)
+			h := newMutexTestHelper()
+			h.scaler = scaler
+			fs := h.generateBlockProfileRecords(512, 32)
 			p1 := bytes.NewBuffer(nil)
-			err := PrintCountCycleProfile(dh, opt, p1, scaler, fs)
+			err := PrintCountCycleProfile(h.dp, h.opt, p1, scaler, fs)
 			assert.NoError(t, err)
 			p1Size := p1.Len()
 			profile, err := gprofile.Parse(p1)
@@ -84,7 +79,7 @@ func TestMutexReject(t *testing.T) {
 			t.Log("p1 size", p1Size)
 
 			p2 := bytes.NewBuffer(nil)
-			err = PrintCountCycleProfile(dh, opt, p2, scaler, fs)
+			err = PrintCountCycleProfile(h.dp, h.opt, p2, scaler, fs)
 			assert.NoError(t, err)
 			p2Size := p2.Len()
 			assert.Less(t, p2Size, 1000)
@@ -108,16 +103,13 @@ func BenchmarkMutexRejectOrder(b *testing.B) {
 			prevMutexProfileFraction := runtime.SetMutexProfileFraction(-1)
 			runtime.SetMutexProfileFraction(5)
 			defer runtime.SetMutexProfileFraction(prevMutexProfileFraction)
-			opt := &pprof.ProfileBuilderOptions{
-				GenericsFrames: false,
-				LazyMapping:    true,
-			}
-			dh := &pprof.DeltaMutexProfiler{}
-			fs := generateBlockProfileRecords(512, 32, 239)
+			h := newMutexTestHelper()
+			h.scaler = scaler
+			fs := h.generateBlockProfileRecords(512, 32)
 			b.ResetTimer()
 
 			for i := 0; i < b.N; i++ {
-				PrintCountCycleProfile(dh, opt, io.Discard, scaler, fs)
+				PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs)
 			}
 		})
 
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index 509ba47..bf0c355 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -1,16 +1,13 @@
 package compat
 
 import (
-	"go/types"
+	"bytes"
+	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
+	"github.com/stretchr/testify/assert"
+	"io"
 	"math/rand"
 	"reflect"
 	"runtime"
-	"strings"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"golang.org/x/tools/go/packages"
 )
 
 func getFunctionPointers() []uintptr {
@@ -160,12 +157,12 @@ func getFunctionPointers() []uintptr {
 	}
 }
 
-func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord {
+func (h *heapTestHelper) generateMemProfileRecords(n, depth int) []runtime.MemProfileRecord {
 	var records []runtime.MemProfileRecord
-	rng := rand.NewSource(int64(seed))
+
 	fs := getFunctionPointers()
 	for i := 0; i < n; i++ {
-		nobj := int(uint64(rng.Int63())) % 1000000
+		nobj := int(uint64(h.rng.Int63())) % 1000000
 		r := runtime.MemProfileRecord{
 			AllocObjects: int64(nobj),
 			AllocBytes:   int64(nobj * 1024),
@@ -173,53 +170,159 @@ func generateMemProfileRecords(n, depth, seed int) []runtime.MemProfileRecord {
 			FreeBytes:    int64(nobj * 1024),
 		}
 		for j := 0; j < depth; j++ {
-			r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)]
+			r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)]
 		}
 		records = append(records, r)
 	}
 	return records
 }
 
-func generateBlockProfileRecords(n, depth, seed int) []runtime.BlockProfileRecord {
+func (h *mutexTestHelper) generateBlockProfileRecords(n, depth int) []runtime.BlockProfileRecord {
 	var records []runtime.BlockProfileRecord
-	rng := rand.NewSource(int64(seed))
 	fs := getFunctionPointers()
 	for i := 0; i < n; i++ {
-		nobj := int(uint64(rng.Int63())) % 1000000
+		nobj := int(uint64(h.rng.Int63())) % 1000000
 		r := runtime.BlockProfileRecord{
 			Count:  int64(nobj),
 			Cycles: int64(nobj * 10),
 		}
 		for j := 0; j < depth; j++ {
-			r.Stack0[j] = fs[int(uint64(rng.Int63()))%len(fs)]
+			r.Stack0[j] = fs[int(uint64(h.rng.Int63()))%len(fs)]
 		}
 		records = append(records, r)
 	}
 	return records
 }
 
-func getFunctions(t testing.TB, pkg string) []*types.Func {
-	var res []*types.Func
-	cfg := &packages.Config{
-		Mode:  packages.NeedImports | packages.NeedExportFile | packages.NeedTypes | packages.NeedSyntax,
-		Tests: true,
+type mutexTestHelper struct {
+	dp     *pprof.DeltaMutexProfiler
+	opt    *pprof.ProfileBuilderOptions
+	scaler pprof.MutexProfileScaler
+	rng    rand.Source
+}
+
+func newMutexTestHelper() *mutexTestHelper {
+	res := &mutexTestHelper{
+		dp: &pprof.DeltaMutexProfiler{},
+		opt: &pprof.ProfileBuilderOptions{
+			GenericsFrames: true,
+			LazyMapping:    true,
+		},
+		scaler: pprof.ScalerMutexProfile,
+		rng:    rand.NewSource(239),
 	}
-	pkgs, err := packages.Load(cfg, pkg)
-	require.NoError(t, err)
-	for _, p := range pkgs {
-		if strings.Contains(p.ID, ".test") {
-			continue
-		}
-		for _, name := range p.Types.Scope().Names() {
-			f := p.Types.Scope().Lookup(name)
-
-			if f != nil {
-				ff, ok := f.(*types.Func)
-				if ok {
-					res = append(res, ff)
-				}
-			}
-		}
+	return res
+
+}
+
+func (h *mutexTestHelper) scale(rcount, rcycles int64) (int64, int64) {
+	cpuGHz := float64(pprof.Runtime_cyclesPerSecond()) / 1e9
+	count, nanosec := pprof.ScaleMutexProfile(h.scaler, rcount, float64(rcycles)/cpuGHz)
+	inanosec := int64(nanosec)
+	return count, inanosec
+}
+func (h *mutexTestHelper) dump(r ...runtime.BlockProfileRecord) *bytes.Buffer {
+	buf := bytes.NewBuffer(nil)
+	err := PrintCountCycleProfile(h.dp, h.opt, buf, h.scaler, r)
+	if err != nil { // never happens
+		panic(err)
+	}
+	return buf
+}
+
+func (h *mutexTestHelper) r(count, cycles int64, s [32]uintptr) runtime.BlockProfileRecord {
+	return runtime.BlockProfileRecord{
+		Count:  count,
+		Cycles: cycles,
+		StackRecord: runtime.StackRecord{
+			Stack0: s,
+		},
+	}
+}
+
+func (h *mutexTestHelper) mutate(fs []runtime.BlockProfileRecord) {
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
+	oneBlockCycles := fs[0].Cycles / fs[0].Count
+	for j := 0; j < nmutations; j++ {
+		idx := int(uint(h.rng.Int63())) % len(fs)
+		fs[idx].Count += 1
+		fs[idx].Cycles += oneBlockCycles
+	}
+}
+
+type heapTestHelper struct {
+	dp   *pprof.DeltaHeapProfiler
+	opt  *pprof.ProfileBuilderOptions
+	rate int64
+	rng  rand.Source
+}
+
+func newHeapTestHelper() *heapTestHelper {
+	res := &heapTestHelper{
+		dp: &pprof.DeltaHeapProfiler{},
+		opt: &pprof.ProfileBuilderOptions{
+			GenericsFrames: true,
+			LazyMapping:    true,
+		},
+		rng:  rand.NewSource(239),
+		rate: int64(runtime.MemProfileRate),
 	}
 	return res
 }
+
+func (h *heapTestHelper) dump(r ...runtime.MemProfileRecord) *bytes.Buffer {
+	buf := bytes.NewBuffer(nil)
+	err := WriteHeapProto(h.dp, h.opt, buf, r, h.rate)
+	if err != nil { // never happens
+		panic(err)
+	}
+	return buf
+}
+
+func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int64, s [32]uintptr) runtime.MemProfileRecord {
+	return runtime.MemProfileRecord{
+		AllocObjects: AllocObjects,
+		AllocBytes:   AllocBytes,
+		FreeBytes:    FreeBytes,
+		FreeObjects:  FreeObjects,
+		Stack0:       s,
+	}
+}
+
+func (h *heapTestHelper) mutate(fs []runtime.MemProfileRecord) {
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
+	objSize := fs[0].AllocBytes / fs[0].AllocObjects
+	for j := 0; j < nmutations; j++ {
+		idx := int(uint(h.rng.Int63())) % len(fs)
+		fs[idx].AllocObjects += 1
+		fs[idx].AllocBytes += objSize
+		fs[idx].FreeObjects += 1
+		fs[idx].FreeBytes += objSize
+	}
+}
+
+func WriteHeapProto(dp *pprof.DeltaHeapProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, p []runtime.MemProfileRecord, rate int64) error {
+	stc := pprof.HeapProfileConfig(rate)
+	b := pprof.NewProfileBuilder(w, opt, stc)
+	return dp.WriteHeapProto(b, p, rate)
+}
+
+func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuilderOptions, w io.Writer, scaler pprof.MutexProfileScaler, records []runtime.BlockProfileRecord) error {
+	stc := pprof.MutexProfileConfig()
+	b := pprof.NewProfileBuilder(w, opt, stc)
+	return d.PrintCountCycleProfile(b, scaler, records)
+}
+
+type noopBuilder struct {
+}
+
+func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 {
+	return nil
+}
+func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) {
+
+}
+
+func (b *noopBuilder) Build() {
+
+}

From f6edf8eb6e7ad6e595fc3aad714e55a1d18e833c Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 15:50:17 +0700
Subject: [PATCH 02/33] add more tests for allocations

---
 godeltaprof/compat/delta_test.go         | 156 +++++++++++++++++++++++
 godeltaprof/compat/stackcollapse.go      |  80 +++++++++---
 godeltaprof/compat/testdata.go           |  19 +++
 godeltaprof/internal/pprof/delta_heap.go |   9 +-
 4 files changed, 244 insertions(+), 20 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index a528c6d..d7aad9c 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -1,6 +1,13 @@
 package compat
 
 import (
+	"bytes"
+	"fmt"
+	gprofile "github.com/google/pprof/profile"
+	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"reflect"
 	"runtime"
 	"testing"
 )
@@ -36,6 +43,7 @@ func TestDeltaHeap(t *testing.T) {
 	const testObjectSize = 327680
 
 	h := newHeapTestHelper()
+	h.rate = testMemProfileRate
 
 	p1 := h.dump(
 		h.r(0, 0, 0, 0, stack0),
@@ -159,3 +167,151 @@ func BenchmarkMutexDelta(b *testing.B) {
 
 	}
 }
+
+func TestMutexDuplicates(t *testing.T) {
+	h := newMutexTestHelper()
+	const cycles = 42
+	p := h.dump(
+		h.r(239, 239*cycles, stack0),
+		h.r(42, 42*cycles, stack1),
+		h.r(7, 7*cycles, stack0),
+	)
+	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
+	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
+
+	p = h.dump(
+		h.r(239, 239*cycles, stack0),
+		h.r(42, 42*cycles, stack1),
+		h.r(7, 7*cycles, stack0),
+	)
+	expectEmptyProfile(t, p)
+}
+
+func TestHeapDuplicates(t *testing.T) {
+	const testMemProfileRate = 524288
+	h := newHeapTestHelper()
+	h.rate = testMemProfileRate
+	const blockSize = 1024
+	p := h.dump(
+		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+	)
+	c1, b1 := pprof.ScaleHeapSample(239+7, (239+7)*blockSize, testMemProfileRate)
+	expectStackFrames(t, p, stack0Marker, c1, b1, 0, 0)
+	c2, b2 := pprof.ScaleHeapSample(42, 42*blockSize, testMemProfileRate)
+	expectStackFrames(t, p, stack1Marker, c2, b2, 0, 0)
+
+	p = h.dump(
+		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+	)
+	expectEmptyProfile(t, p)
+}
+
+func TestChanAllocDup(t *testing.T) {
+	prevRate := runtime.MemProfileRate
+	runtime.MemProfileRate = 1
+	defer func() {
+		runtime.MemProfileRate = prevRate
+	}()
+	h := newHeapTestHelper()
+	h.rate = int64(runtime.MemProfileRate)
+
+	tests := []int{0, 1024}
+	profiles := []*bytes.Buffer{
+		bytes.NewBuffer(nil),
+		bytes.NewBuffer(nil),
+	}
+	f := func(i int) {
+		for _, test := range tests {
+			_ = make(chan int, test)
+			_ = make(chan structWithPointers, test) // with pointers
+		}
+		runtime.GC()
+		runtime.GC()
+		p := profiles[i]
+		_ = WriteHeapProto(h.dp, h.opt, p, dumpMemProfileRecords(), h.rate)
+	}
+	for i := 0; i < 2; i++ {
+		f(i)
+	}
+	runtime.MemProfileRate = prevRate
+
+	for _, p := range profiles {
+		printProfile(t, p)
+		// different types of allocations depending on the capacity of the channel and whether the channel type has pointers
+		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
+			1,
+			1, 18432, 0, 0)
+		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
+			3,
+			1, 96, 0, 0)
+		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
+			1,
+			1, 9472, 0, 0)
+	}
+}
+
+type structWithPointers struct {
+	t1 *testing.T
+	t2 *testing.T
+}
+
+// todo we should merge these allocations with an option
+func TestMapAlloc(t *testing.T) {
+	prevRate := runtime.MemProfileRate
+	runtime.MemProfileRate = 1
+	defer func() {
+		runtime.MemProfileRate = prevRate
+	}()
+	h := newHeapTestHelper()
+	h.rate = int64(runtime.MemProfileRate)
+
+	profiles := []*bytes.Buffer{
+		bytes.NewBuffer(nil),
+		bytes.NewBuffer(nil),
+	}
+	f := func(i int) {
+		mm := make(map[string]structWithPointers)
+		for i := 0; i < 1024; i++ {
+			k := fmt.Sprintf("k_____%d", i)
+			mm[k] = structWithPointers{t, t}
+		}
+		runtime.GC()
+		runtime.GC()
+		p := profiles[i]
+		_ = WriteHeapProto(h.dp, h.opt, p, dumpMemProfileRecords(), h.rate)
+	}
+	for i := 0; i < 2; i++ {
+		f(i)
+	}
+
+	runtime.MemProfileRate = prevRate
+
+	p0, err := gprofile.ParseData(profiles[0].Bytes())
+	require.NoError(t, err)
+	p1, err := gprofile.ParseData(profiles[0].Bytes())
+	require.NoError(t, err)
+
+	p0samples := grepSamples(p0, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
+	printProfile(t, profiles[0])
+	for _, sample := range p0samples {
+		p0Count := 0
+		for _, p0sample := range p0samples {
+			if reflect.DeepEqual(sample.Value, p0sample.Value) {
+				p0Count++
+			}
+		}
+		p1Count := 0
+		for _, p1sample := range p1.Sample {
+			if reflect.DeepEqual(sample.Value, p1sample.Value) {
+				p1Count++
+			}
+		}
+		assert.Greater(t, p0Count, 0)
+		assert.Equal(t, p0Count, p1Count)
+	}
+	assert.Greater(t, len(p0samples), 10)
+}
diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index 7051ab5..e5a9e7d 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -3,6 +3,7 @@ package compat
 import (
 	"bytes"
 	"io"
+	"reflect"
 	"regexp"
 	"sort"
 	"strings"
@@ -26,6 +27,16 @@ func expectEmptyProfile(t *testing.T, buffer io.Reader) {
 	assert.Empty(t, ls)
 }
 
+func printProfile(t *testing.T, p *bytes.Buffer) {
+	profile, err := gprofile.Parse(bytes.NewBuffer(p.Bytes()))
+	require.NoError(t, err)
+	t.Log("==================")
+	for _, sample := range profile.Sample {
+		s := pprofSampleToString(sample)
+		t.Logf("%v %v %v\n", s, sample.Value, sample.NumLabel)
+	}
+}
+
 func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) {
 	profile, err := gprofile.ParseData(buffer.Bytes())
 	require.NoError(t, err)
@@ -45,6 +56,32 @@ func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, val
 	}
 }
 
+func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern string, expectedCount int, expectedValues ...int64) {
+	profile, err := gprofile.ParseData(buffer.Bytes())
+	require.NoError(t, err)
+	cnt := 0
+	samples := grepSamples(profile, samplePattern)
+	for i := range samples {
+		if reflect.DeepEqual(profile.Sample[i].Value, expectedValues) {
+			cnt++
+		}
+	}
+	assert.Equal(t, expectedCount, cnt)
+}
+
+func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sample {
+	rr := regexp.MustCompile(samplePattern)
+	var samples []*gprofile.Sample
+	for i := range profile.Sample {
+		ss := pprofSampleToString(profile.Sample[i])
+		if !rr.MatchString(ss) {
+			continue
+		}
+		samples = append(samples, profile.Sample[i])
+	}
+	return samples
+}
+
 func findStack(t *testing.T, res []stack, re string) *stack {
 	rr := regexp.MustCompile(re)
 	for i, re := range res {
@@ -58,23 +95,9 @@ func findStack(t *testing.T, res []stack, re string) *stack {
 func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack {
 	var ret []stack
 	for _, s := range p.Sample {
-		var funcs []string
-		for i := range s.Location {
-
-			loc := s.Location[i]
-			for _, line := range loc.Line {
-				f := line.Function
-				//funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line))
-				funcs = append(funcs, f.Name)
-			}
-		}
-		for i := 0; i < len(funcs)/2; i++ {
-			j := len(funcs) - i - 1
-			funcs[i], funcs[j] = funcs[j], funcs[i]
-		}
-
+		funcs, strSample := pprofSampleToStrings(s)
 		ret = append(ret, stack{
-			line:  strings.Join(funcs, ";"),
+			line:  strSample,
 			funcs: funcs,
 			value: s.Value,
 		})
@@ -105,3 +128,28 @@ func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack {
 
 	return unique
 }
+
+func pprofSampleToString(s *gprofile.Sample) string {
+	_, v := pprofSampleToStrings(s)
+	return v
+}
+
+func pprofSampleToStrings(s *gprofile.Sample) ([]string, string) {
+	var funcs []string
+	for i := range s.Location {
+
+		loc := s.Location[i]
+		for _, line := range loc.Line {
+			f := line.Function
+			//funcs = append(funcs, fmt.Sprintf("%s:%d", f.Name, line.Line))
+			funcs = append(funcs, f.Name)
+		}
+	}
+	for i := 0; i < len(funcs)/2; i++ {
+		j := len(funcs) - i - 1
+		funcs[i], funcs[j] = funcs[j], funcs[i]
+	}
+
+	strSample := strings.Join(funcs, ";")
+	return funcs, strSample
+}
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index bf0c355..1bfc1df 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -313,12 +313,31 @@ func PrintCountCycleProfile(d *pprof.DeltaMutexProfiler, opt *pprof.ProfileBuild
 	return d.PrintCountCycleProfile(b, scaler, records)
 }
 
+func dumpMemProfileRecords() []runtime.MemProfileRecord {
+	var p []runtime.MemProfileRecord
+	n, ok := runtime.MemProfile(nil, true)
+	for {
+		// Allocate room for a slightly bigger profile,
+		// in case a few more entries have been added
+		// since the call to MemProfile.
+		p = make([]runtime.MemProfileRecord, n+50)
+		n, ok = runtime.MemProfile(p, true)
+		if ok {
+			p = p[0:n]
+			break
+		}
+		// Profile grew; try again.
+	}
+	return p
+}
+
 type noopBuilder struct {
 }
 
 func (b *noopBuilder) LocsForStack(_ []uintptr) []uint64 {
 	return nil
 }
+
 func (b *noopBuilder) Sample(_ []int64, _ []uint64, _ int64) {
 
 }
diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 0392382..48a4af1 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -8,6 +8,7 @@ import (
 
 type DeltaHeapProfiler struct {
 	m profMap
+	//todo consider adding an option to remove block size label and merge allocations of different size
 }
 
 // WriteHeapProto writes the current heap profile in protobuf format to w.
@@ -34,8 +35,8 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		entry.count.v1 = r.AllocObjects
 		entry.count.v2 = r.AllocBytes
 
-		values[0], values[1] = scaleHeapSample(AllocObjects, AllocBytes, rate)
-		values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
+		values[0], values[1] = ScaleHeapSample(AllocObjects, AllocBytes, rate)
+		values[2], values[3] = ScaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
 
 		if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 {
 			continue
@@ -70,7 +71,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 	return nil
 }
 
-// scaleHeapSample adjusts the data from a heap Sample to
+// ScaleHeapSample adjusts the data from a heap Sample to
 // account for its probability of appearing in the collected
 // data. heap profiles are a sampling of the memory allocations
 // requests in a program. We estimate the unsampled value by dividing
@@ -79,7 +80,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 // which samples to collect, based on the desired average collection
 // rate R. The probability of a sample of size S to appear in that
 // profile is 1-exp(-S/R).
-func scaleHeapSample(count, size, rate int64) (int64, int64) {
+func ScaleHeapSample(count, size, rate int64) (int64, int64) {
 	if count == 0 || size == 0 {
 		return 0, 0
 	}

From 25a686e392e335ff0bf1674e1f04396cfc3396e2 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 15:51:29 +0700
Subject: [PATCH 03/33] comment

---
 godeltaprof/compat/delta_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index d7aad9c..0f386d5 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -242,6 +242,7 @@ func TestChanAllocDup(t *testing.T) {
 	for _, p := range profiles {
 		printProfile(t, p)
 		// different types of allocations depending on the capacity of the channel and whether the channel type has pointers
+		// this is fragile and relies on runtime internals
 		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
 			1,
 			1, 18432, 0, 0)

From aba8908a2c1626934590c9d06e2c815a6dd5d951 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 15:52:46 +0700
Subject: [PATCH 04/33] more logs

---
 godeltaprof/compat/delta_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 0f386d5..6b148ef 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -298,6 +298,7 @@ func TestMapAlloc(t *testing.T) {
 
 	p0samples := grepSamples(p0, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
 	printProfile(t, profiles[0])
+	printProfile(t, profiles[1])
 	for _, sample := range p0samples {
 		p0Count := 0
 		for _, p0sample := range p0samples {

From 6f40c508f6a07e357a4ef6a0747d93c729934ee4 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 15:59:28 +0700
Subject: [PATCH 05/33] more logs

---
 godeltaprof/compat/delta_test.go    |  5 +++--
 godeltaprof/compat/stackcollapse.go | 12 ++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6b148ef..394d970 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -312,8 +312,9 @@ func TestMapAlloc(t *testing.T) {
 				p1Count++
 			}
 		}
-		assert.Greater(t, p0Count, 0)
-		assert.Equal(t, p0Count, p1Count)
+		assert.Greaterf(t, p0Count, 0, "sample %v not found in p0", pprofSampleStackToString(sample))
+		assert.Greaterf(t, p1Count, 0, "sample %v not found in p1", pprofSampleStackToString(sample))
+		assert.Equalf(t, p0Count, p1Count, "sampel %v count mismatch %v", pprofSampleStackToString(sample), sample.Value)
 	}
 	assert.Greater(t, len(p0samples), 10)
 }
diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index e5a9e7d..4514754 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -32,7 +32,7 @@ func printProfile(t *testing.T, p *bytes.Buffer) {
 	require.NoError(t, err)
 	t.Log("==================")
 	for _, sample := range profile.Sample {
-		s := pprofSampleToString(sample)
+		s := pprofSampleStackToString(sample)
 		t.Logf("%v %v %v\n", s, sample.Value, sample.NumLabel)
 	}
 }
@@ -73,7 +73,7 @@ func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sa
 	rr := regexp.MustCompile(samplePattern)
 	var samples []*gprofile.Sample
 	for i := range profile.Sample {
-		ss := pprofSampleToString(profile.Sample[i])
+		ss := pprofSampleStackToString(profile.Sample[i])
 		if !rr.MatchString(ss) {
 			continue
 		}
@@ -95,7 +95,7 @@ func findStack(t *testing.T, res []stack, re string) *stack {
 func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack {
 	var ret []stack
 	for _, s := range p.Sample {
-		funcs, strSample := pprofSampleToStrings(s)
+		funcs, strSample := pprofSampleStackToStrings(s)
 		ret = append(ret, stack{
 			line:  strSample,
 			funcs: funcs,
@@ -129,12 +129,12 @@ func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack {
 	return unique
 }
 
-func pprofSampleToString(s *gprofile.Sample) string {
-	_, v := pprofSampleToStrings(s)
+func pprofSampleStackToString(s *gprofile.Sample) string {
+	_, v := pprofSampleStackToStrings(s)
 	return v
 }
 
-func pprofSampleToStrings(s *gprofile.Sample) ([]string, string) {
+func pprofSampleStackToStrings(s *gprofile.Sample) ([]string, string) {
 	var funcs []string
 	for i := range s.Location {
 

From 9b1889b9916eacfce5a81144f28d4b8b6a41cb2e Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 16:06:07 +0700
Subject: [PATCH 06/33] fix test

---
 godeltaprof/compat/delta_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 394d970..44fed8b 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -297,6 +297,7 @@ func TestMapAlloc(t *testing.T) {
 	require.NoError(t, err)
 
 	p0samples := grepSamples(p0, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
+	p1samples := grepSamples(p1, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
 	printProfile(t, profiles[0])
 	printProfile(t, profiles[1])
 	for _, sample := range p0samples {
@@ -307,7 +308,7 @@ func TestMapAlloc(t *testing.T) {
 			}
 		}
 		p1Count := 0
-		for _, p1sample := range p1.Sample {
+		for _, p1sample := range p1samples {
 			if reflect.DeepEqual(sample.Value, p1sample.Value) {
 				p1Count++
 			}

From d752721a25cfc6a8202eefaab6a4a7f8a7d862cc Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 16:12:09 +0700
Subject: [PATCH 07/33] more logs

---
 godeltaprof/compat/stackcollapse.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index 4514754..5e0275f 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -66,7 +66,7 @@ func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern stri
 			cnt++
 		}
 	}
-	assert.Equal(t, expectedCount, cnt)
+	assert.Equalf(t, expectedCount, cnt, "expected samples re: %s\n   values: %v\n    count:%d\n    all samples:%v\n", samplePattern, expectedValues, expectedCount, samples)
 }
 
 func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sample {

From 50f78aa6c1b9597a924388aefddecab853e41eb7 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 16:16:13 +0700
Subject: [PATCH 08/33] more logs

---
 godeltaprof/compat/stackcollapse.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index 5e0275f..59a0e2f 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -66,7 +66,7 @@ func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern stri
 			cnt++
 		}
 	}
-	assert.Equalf(t, expectedCount, cnt, "expected samples re: %s\n   values: %v\n    count:%d\n    all samples:%v\n", samplePattern, expectedValues, expectedCount, samples)
+	assert.Equalf(t, expectedCount, cnt, "expected samples re: %s\n   values: %v\n    count:%d\n    all samples:%+v\n", samplePattern, expectedValues, expectedCount, samples)
 }
 
 func grepSamples(profile *gprofile.Profile, samplePattern string) []*gprofile.Sample {

From 07915a5fdc34c70e9b069ea6f0a6433996aa773c Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 16:44:19 +0700
Subject: [PATCH 09/33] diff

---
 godeltaprof/compat/delta_test.go | 79 ++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 13 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 44fed8b..6a02d39 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -9,6 +9,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"reflect"
 	"runtime"
+	"slices"
 	"testing"
 )
 
@@ -210,6 +211,19 @@ func TestHeapDuplicates(t *testing.T) {
 	expectEmptyProfile(t, p)
 }
 
+type allocEntry struct {
+	pc     []uintptr
+	values []int64
+	sample *gprofile.Sample
+}
+
+func (e *allocEntry) String() string {
+	return fmt.Sprintf("%+v %+v ", pprofSampleStackToString(e.sample), e.values)
+}
+func (e *allocEntry) String2() string {
+	return fmt.Sprintf("%+v %+v", e.pc, e.values)
+}
+
 func TestChanAllocDup(t *testing.T) {
 	prevRate := runtime.MemProfileRate
 	runtime.MemProfileRate = 1
@@ -239,20 +253,59 @@ func TestChanAllocDup(t *testing.T) {
 	}
 	runtime.MemProfileRate = prevRate
 
-	for _, p := range profiles {
-		printProfile(t, p)
-		// different types of allocations depending on the capacity of the channel and whether the channel type has pointers
-		// this is fragile and relies on runtime internals
-		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
-			1,
-			1, 18432, 0, 0)
-		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
-			3,
-			1, 96, 0, 0)
-		expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
-			1,
-			1, 9472, 0, 0)
+	var entries [][]allocEntry
+	var strEntries [][]string
+	const pattern = "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$"
+
+	for _, profile := range profiles {
+		pp, err := gprofile.ParseData(profile.Bytes())
+		require.NoError(t, err)
+		samples := grepSamples(pp, pattern)
+		es := []allocEntry{}
+		for _, sample := range samples {
+			e := allocEntry{}
+			for _, location := range sample.Location {
+				e.pc = append(e.pc, uintptr(location.Address))
+			}
+			e.values = sample.Value
+			e.sample = sample
+			es = append(es, e)
+		}
+		entries = append(entries, es)
+	}
+	for i, es := range entries {
+		slices.SortFunc(es, func(i, j allocEntry) int {
+			pc := slices.Compare(i.pc, j.pc)
+			if pc != 0 {
+				return pc
+			}
+			return slices.Compare(i.values, j.values)
+		})
+		ss := []string{}
+		fmt.Printf("========================== %d\n", i)
+		for _, e := range es {
+			ss = append(ss, e.String())
+			fmt.Printf("%s\n", e.String())
+			fmt.Printf("%s\n", e.String2())
+		}
+		strEntries = append(strEntries, ss)
 	}
+
+	assert.Equal(t, strEntries[0], strEntries[1])
+	//for _, p := range profiles {
+	//	printProfile(t, p)
+	//	// different types of allocations depending on the capacity of the channel and whether the channel type has pointers
+	//	// this is fragile and relies on runtime internals
+	//	expectPPROFLocations(t, p, pattern,
+	//		1,
+	//		1, 18432, 0, 0)
+	//	expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
+	//		3,
+	//		1, 96, 0, 0)
+	//	expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
+	//		1,
+	//		1, 9472, 0, 0)
+	//}
 }
 
 type structWithPointers struct {

From 68de7a4a75965d6b4192cb016afc1e54381d0136 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 16:56:31 +0700
Subject: [PATCH 10/33] diff

---
 godeltaprof/compat/delta_test.go | 148 +++++++++++++------------------
 1 file changed, 61 insertions(+), 87 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6a02d39..35ae3da 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -4,10 +4,8 @@ import (
 	"bytes"
 	"fmt"
 	gprofile "github.com/google/pprof/profile"
-	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"reflect"
 	"runtime"
 	"slices"
 	"testing"
@@ -169,47 +167,47 @@ func BenchmarkMutexDelta(b *testing.B) {
 	}
 }
 
-func TestMutexDuplicates(t *testing.T) {
-	h := newMutexTestHelper()
-	const cycles = 42
-	p := h.dump(
-		h.r(239, 239*cycles, stack0),
-		h.r(42, 42*cycles, stack1),
-		h.r(7, 7*cycles, stack0),
-	)
-	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
-	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
-
-	p = h.dump(
-		h.r(239, 239*cycles, stack0),
-		h.r(42, 42*cycles, stack1),
-		h.r(7, 7*cycles, stack0),
-	)
-	expectEmptyProfile(t, p)
-}
-
-func TestHeapDuplicates(t *testing.T) {
-	const testMemProfileRate = 524288
-	h := newHeapTestHelper()
-	h.rate = testMemProfileRate
-	const blockSize = 1024
-	p := h.dump(
-		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
-		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
-		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
-	)
-	c1, b1 := pprof.ScaleHeapSample(239+7, (239+7)*blockSize, testMemProfileRate)
-	expectStackFrames(t, p, stack0Marker, c1, b1, 0, 0)
-	c2, b2 := pprof.ScaleHeapSample(42, 42*blockSize, testMemProfileRate)
-	expectStackFrames(t, p, stack1Marker, c2, b2, 0, 0)
-
-	p = h.dump(
-		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
-		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
-		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
-	)
-	expectEmptyProfile(t, p)
-}
+//func TestMutexDuplicates(t *testing.T) {
+//	h := newMutexTestHelper()
+//	const cycles = 42
+//	p := h.dump(
+//		h.r(239, 239*cycles, stack0),
+//		h.r(42, 42*cycles, stack1),
+//		h.r(7, 7*cycles, stack0),
+//	)
+//	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
+//	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
+//
+//	p = h.dump(
+//		h.r(239, 239*cycles, stack0),
+//		h.r(42, 42*cycles, stack1),
+//		h.r(7, 7*cycles, stack0),
+//	)
+//	expectEmptyProfile(t, p)
+//}
+//
+//func TestHeapDuplicates(t *testing.T) {
+//	const testMemProfileRate = 524288
+//	h := newHeapTestHelper()
+//	h.rate = testMemProfileRate
+//	const blockSize = 1024
+//	p := h.dump(
+//		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+//		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+//		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+//	)
+//	c1, b1 := pprof.ScaleHeapSample(239+7, (239+7)*blockSize, testMemProfileRate)
+//	expectStackFrames(t, p, stack0Marker, c1, b1, 0, 0)
+//	c2, b2 := pprof.ScaleHeapSample(42, 42*blockSize, testMemProfileRate)
+//	expectStackFrames(t, p, stack1Marker, c2, b2, 0, 0)
+//
+//	p = h.dump(
+//		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+//		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+//		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+//	)
+//	expectEmptyProfile(t, p)
+//}
 
 type allocEntry struct {
 	pc     []uintptr
@@ -218,7 +216,7 @@ type allocEntry struct {
 }
 
 func (e *allocEntry) String() string {
-	return fmt.Sprintf("%+v %+v ", pprofSampleStackToString(e.sample), e.values)
+	return fmt.Sprintf("%+v %+v %+v", pprofSampleStackToString(e.sample), e.values, e.sample.NumLabel)
 }
 func (e *allocEntry) String2() string {
 	return fmt.Sprintf("%+v %+v", e.pc, e.values)
@@ -253,16 +251,27 @@ func TestChanAllocDup(t *testing.T) {
 	}
 	runtime.MemProfileRate = prevRate
 
-	var entries [][]allocEntry
-	var strEntries [][]string
 	const pattern = "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$"
 
+	compareSamplesWithPattern(t, profiles[0], profiles[1], 5, pattern, func(s *gprofile.Sample) bool {
+		return true
+	})
+
+}
+
+func compareSamplesWithPattern(t *testing.T, p0, p1 *bytes.Buffer, gtn int, pattern string, extraFilter func(s *gprofile.Sample) bool) {
+	var entries [][]allocEntry
+	var strEntries [][]string
+	profiles := []*bytes.Buffer{p0, p1}
 	for _, profile := range profiles {
 		pp, err := gprofile.ParseData(profile.Bytes())
 		require.NoError(t, err)
 		samples := grepSamples(pp, pattern)
 		es := []allocEntry{}
 		for _, sample := range samples {
+			if !extraFilter(sample) {
+				continue
+			}
 			e := allocEntry{}
 			for _, location := range sample.Location {
 				e.pc = append(e.pc, uintptr(location.Address))
@@ -292,20 +301,7 @@ func TestChanAllocDup(t *testing.T) {
 	}
 
 	assert.Equal(t, strEntries[0], strEntries[1])
-	//for _, p := range profiles {
-	//	printProfile(t, p)
-	//	// different types of allocations depending on the capacity of the channel and whether the channel type has pointers
-	//	// this is fragile and relies on runtime internals
-	//	expectPPROFLocations(t, p, pattern,
-	//		1,
-	//		1, 18432, 0, 0)
-	//	expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
-	//		3,
-	//		1, 96, 0, 0)
-	//	expectPPROFLocations(t, p, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$",
-	//		1,
-	//		1, 9472, 0, 0)
-	//}
+	assert.GreaterOrEqual(t, len(strEntries[0]), gtn)
 }
 
 type structWithPointers struct {
@@ -344,31 +340,9 @@ func TestMapAlloc(t *testing.T) {
 
 	runtime.MemProfileRate = prevRate
 
-	p0, err := gprofile.ParseData(profiles[0].Bytes())
-	require.NoError(t, err)
-	p1, err := gprofile.ParseData(profiles[0].Bytes())
-	require.NoError(t, err)
-
-	p0samples := grepSamples(p0, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
-	p1samples := grepSamples(p1, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$")
-	printProfile(t, profiles[0])
-	printProfile(t, profiles[1])
-	for _, sample := range p0samples {
-		p0Count := 0
-		for _, p0sample := range p0samples {
-			if reflect.DeepEqual(sample.Value, p0sample.Value) {
-				p0Count++
-			}
-		}
-		p1Count := 0
-		for _, p1sample := range p1samples {
-			if reflect.DeepEqual(sample.Value, p1sample.Value) {
-				p1Count++
-			}
-		}
-		assert.Greaterf(t, p0Count, 0, "sample %v not found in p0", pprofSampleStackToString(sample))
-		assert.Greaterf(t, p1Count, 0, "sample %v not found in p1", pprofSampleStackToString(sample))
-		assert.Equalf(t, p0Count, p1Count, "sampel %v count mismatch %v", pprofSampleStackToString(sample), sample.Value)
-	}
-	assert.Greater(t, len(p0samples), 10)
+	compareSamplesWithPattern(t, profiles[0], profiles[1], 10, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$", func(s *gprofile.Sample) bool {
+		bs := s.NumLabel["bytes"]
+		return len(bs) == 1 && bs[0] != 288 // i have no idea what this is, this is to fix fragile test
+	})
+
 }

From 1f5f735830a7d8b699bf2823ad9cd3b86cdd00dd Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 17:16:34 +0700
Subject: [PATCH 11/33] no generics

---
 go.work.sum                      |  7 ++++
 godeltaprof/compat/delta_test.go | 57 +++++++++++++++++++++++++++-----
 godeltaprof/compat/go.mod        |  5 +--
 godeltaprof/compat/go.sum        | 11 +++---
 4 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/go.work.sum b/go.work.sum
index e663de3..141a471 100644
--- a/go.work.sum
+++ b/go.work.sum
@@ -16,6 +16,7 @@ github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk=
 github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
 github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
@@ -30,6 +31,12 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg=
 golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
 golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
+golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2 h1:IRJeR9r1pYWsHKTRe/IInb7lYvbBVIqOgsX/u0mbOWY=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 35ae3da..2eb44f3 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -7,7 +7,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"runtime"
-	"slices"
+	"sort"
 	"testing"
 )
 
@@ -210,7 +210,7 @@ func BenchmarkMutexDelta(b *testing.B) {
 //}
 
 type allocEntry struct {
-	pc     []uintptr
+	pc     []int64
 	values []int64
 	sample *gprofile.Sample
 }
@@ -274,7 +274,7 @@ func compareSamplesWithPattern(t *testing.T, p0, p1 *bytes.Buffer, gtn int, patt
 			}
 			e := allocEntry{}
 			for _, location := range sample.Location {
-				e.pc = append(e.pc, uintptr(location.Address))
+				e.pc = append(e.pc, int64(location.Address))
 			}
 			e.values = sample.Value
 			e.sample = sample
@@ -282,26 +282,67 @@ func compareSamplesWithPattern(t *testing.T, p0, p1 *bytes.Buffer, gtn int, patt
 		}
 		entries = append(entries, es)
 	}
+	cmpi := func(x, y int64) int {
+		if x < y {
+			return -1
+		}
+		if x > y {
+			return +1
+		}
+		return 0
+	}
+	cmp := func(s1, s2 []int64) int {
+		for i, v1 := range s1 {
+			if i >= len(s2) {
+				return +1
+			}
+			v2 := s2[i]
+			if c := cmpi(v1, v2); c != 0 {
+				return c
+			}
+		}
+		if len(s1) < len(s2) {
+			return -1
+		}
+		return 0
+	}
+
+	pcmaps := []map[string]int{}
+
 	for i, es := range entries {
-		slices.SortFunc(es, func(i, j allocEntry) int {
-			pc := slices.Compare(i.pc, j.pc)
-			if pc != 0 {
-				return pc
+		sort.Slice(es, func(i, j int) bool {
+			if c := cmp(es[i].pc, es[j].pc); c != 0 {
+				return c < 0
+			}
+			if c := cmp(es[i].values, es[j].values); c != 0 {
+				return c < 0
 			}
-			return slices.Compare(i.values, j.values)
+			return false
 		})
 		ss := []string{}
 		fmt.Printf("========================== %d\n", i)
+		pcmap := map[string]int{}
 		for _, e := range es {
 			ss = append(ss, e.String())
 			fmt.Printf("%s\n", e.String())
 			fmt.Printf("%s\n", e.String2())
+			pcmap[fmt.Sprintf("%+v", e.pc)]++
 		}
 		strEntries = append(strEntries, ss)
+		pcmaps = append(pcmaps, pcmap)
 	}
 
 	assert.Equal(t, strEntries[0], strEntries[1])
 	assert.GreaterOrEqual(t, len(strEntries[0]), gtn)
+
+	cnt := 0
+	pcmap := pcmaps[0]
+	for _, v := range pcmap {
+		if v > 1 {
+			cnt++
+		}
+	}
+	assert.Greater(t, cnt, 0)
 }
 
 type structWithPointers struct {
diff --git a/godeltaprof/compat/go.mod b/godeltaprof/compat/go.mod
index bbe8f66..72ed526 100644
--- a/godeltaprof/compat/go.mod
+++ b/godeltaprof/compat/go.mod
@@ -6,12 +6,13 @@ require (
 	github.com/google/pprof v0.0.0-20231127191134-f3a68a39ae15
 	github.com/grafana/pyroscope-go/godeltaprof v0.1.5
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/tools v0.16.0
+	golang.org/x/tools v0.21.0
 )
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	golang.org/x/mod v0.14.0 // indirect
+	golang.org/x/mod v0.17.0 // indirect
+	golang.org/x/sync v0.7.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/godeltaprof/compat/go.sum b/godeltaprof/compat/go.sum
index cc90f97..2d8b98d 100644
--- a/godeltaprof/compat/go.sum
+++ b/godeltaprof/compat/go.sum
@@ -8,11 +8,12 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
-golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
-golang.org/x/tools v0.16.0 h1:GO788SKMRunPIBCXiQyo2AaexLstOrVhuAL5YwsckQM=
-golang.org/x/tools v0.16.0/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0=
+golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw=
+golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

From 536859ce40b387f85e789c409a3ed90c8072b3a4 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 17:18:57 +0700
Subject: [PATCH 12/33] revert dependencies

---
 go.work.sum               |  7 -------
 godeltaprof/compat/go.mod |  5 ++---
 godeltaprof/compat/go.sum | 11 +++++------
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/go.work.sum b/go.work.sum
index 141a471..e663de3 100644
--- a/go.work.sum
+++ b/go.work.sum
@@ -16,7 +16,6 @@ github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk=
 github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
 github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
@@ -31,12 +30,6 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg=
 golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
 golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
-golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
-golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
-golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2 h1:IRJeR9r1pYWsHKTRe/IInb7lYvbBVIqOgsX/u0mbOWY=
-golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
diff --git a/godeltaprof/compat/go.mod b/godeltaprof/compat/go.mod
index 72ed526..bbe8f66 100644
--- a/godeltaprof/compat/go.mod
+++ b/godeltaprof/compat/go.mod
@@ -6,13 +6,12 @@ require (
 	github.com/google/pprof v0.0.0-20231127191134-f3a68a39ae15
 	github.com/grafana/pyroscope-go/godeltaprof v0.1.5
 	github.com/stretchr/testify v1.8.4
-	golang.org/x/tools v0.21.0
+	golang.org/x/tools v0.16.0
 )
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	golang.org/x/mod v0.17.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
+	golang.org/x/mod v0.14.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/godeltaprof/compat/go.sum b/godeltaprof/compat/go.sum
index 2d8b98d..cc90f97 100644
--- a/godeltaprof/compat/go.sum
+++ b/godeltaprof/compat/go.sum
@@ -8,12 +8,11 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
-golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
-golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw=
-golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
+golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
+golang.org/x/tools v0.16.0 h1:GO788SKMRunPIBCXiQyo2AaexLstOrVhuAL5YwsckQM=
+golang.org/x/tools v0.16.0/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

From 318d9ab129bce28b26331263c61a6042c2c77e4b Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 17:20:21 +0700
Subject: [PATCH 13/33] todo

---
 godeltaprof/compat/delta_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 2eb44f3..6ba7bb3 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -193,6 +193,7 @@ func BenchmarkMutexDelta(b *testing.B) {
 //	const blockSize = 1024
 //	p := h.dump(
 //		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+// todo add a record with the same stack but different values
 //		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
 //		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
 //	)

From 2c422ec084822bcbeda37d4dc5d129ef0a0b6d24 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 17:21:08 +0700
Subject: [PATCH 14/33] todo

---
 godeltaprof/compat/delta_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6ba7bb3..237e696 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -384,7 +384,7 @@ func TestMapAlloc(t *testing.T) {
 
 	compareSamplesWithPattern(t, profiles[0], profiles[1], 10, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$", func(s *gprofile.Sample) bool {
 		bs := s.NumLabel["bytes"]
-		return len(bs) == 1 && bs[0] != 288 // i have no idea what this is, this is to fix fragile test
+		return len(bs) == 1 && bs[0] != 288 && bs[0] != 16 // i have no idea what this is, this is to fix fragile test
 	})
 
 }

From 849451b82c89693f458b7f6055c59e42fb06288b Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 17:23:03 +0700
Subject: [PATCH 15/33] fix test

---
 godeltaprof/compat/delta_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 237e696..8f44138 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -367,7 +367,7 @@ func TestMapAlloc(t *testing.T) {
 	}
 	f := func(i int) {
 		mm := make(map[string]structWithPointers)
-		for i := 0; i < 1024; i++ {
+		for i := 0; i < 2048; i++ {
 			k := fmt.Sprintf("k_____%d", i)
 			mm[k] = structWithPointers{t, t}
 		}
@@ -382,7 +382,7 @@ func TestMapAlloc(t *testing.T) {
 
 	runtime.MemProfileRate = prevRate
 
-	compareSamplesWithPattern(t, profiles[0], profiles[1], 10, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$", func(s *gprofile.Sample) bool {
+	compareSamplesWithPattern(t, profiles[0], profiles[1], 9, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$", func(s *gprofile.Sample) bool {
 		bs := s.NumLabel["bytes"]
 		return len(bs) == 1 && bs[0] != 288 && bs[0] != 16 // i have no idea what this is, this is to fix fragile test
 	})

From f4d5924f3809378bc096168a6e08b0c849d75458 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 19:20:48 +0700
Subject: [PATCH 16/33] improve test, remove internals tests

---
 godeltaprof/compat/delta_test.go | 312 +++++++++----------------------
 1 file changed, 90 insertions(+), 222 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 8f44138..d2d9678 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -1,13 +1,11 @@
 package compat
 
 import (
-	"bytes"
 	"fmt"
-	gprofile "github.com/google/pprof/profile"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
+	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
+	"reflect"
 	"runtime"
-	"sort"
+	"strings"
 	"testing"
 )
 
@@ -16,14 +14,47 @@ var (
 	stack0Marker string
 	stack1       = [32]uintptr{}
 	stack1Marker string
+	stack2       = [32]uintptr{}
+	stack2Marker string
+	stack3       = [32]uintptr{}
+	stack4       = [32]uintptr{}
 )
 
 func init() {
 	fs := getFunctionPointers()
+
 	stack0 = [32]uintptr{fs[0], fs[1]}
 	stack1 = [32]uintptr{fs[2], fs[3]}
-	stack0Marker = runtime.FuncForPC(fs[1]).Name() + ";" + runtime.FuncForPC(fs[0]).Name()
-	stack1Marker = runtime.FuncForPC(fs[3]).Name() + ";" + runtime.FuncForPC(fs[2]).Name()
+	stack2 = [32]uintptr{
+		reflect.ValueOf(runtime.GC).Pointer(),
+		reflect.ValueOf(runtime.FuncForPC).Pointer(),
+		reflect.ValueOf(TestDeltaBlockProfile).Pointer(),
+		reflect.ValueOf(TestDeltaHeap).Pointer(),
+	}
+	stack3 = [32]uintptr{ // equal , but difference in runtime
+		reflect.ValueOf(runtime.GC).Pointer() + 1,
+		reflect.ValueOf(runtime.FuncForPC).Pointer(),
+		reflect.ValueOf(TestDeltaBlockProfile).Pointer(),
+		reflect.ValueOf(TestDeltaHeap).Pointer(),
+	}
+
+	stack4 = [32]uintptr{ // equal , but difference in non runtime frame
+		reflect.ValueOf(runtime.GC).Pointer(),
+		reflect.ValueOf(runtime.FuncForPC).Pointer(),
+		reflect.ValueOf(TestDeltaBlockProfile).Pointer() + 1,
+		reflect.ValueOf(TestDeltaHeap).Pointer(),
+	}
+	marker := func(stk []uintptr) string {
+		res := []string{}
+		for i := range stk {
+			f := stk[len(stk)-1-i]
+			res = append(res, runtime.FuncForPC(f).Name())
+		}
+		return strings.Join(res, ";")
+	}
+	stack0Marker = marker(stack0[:2])
+	stack1Marker = marker(stack1[:2])
+	stack2Marker = marker(stack2[:2])
 }
 
 func TestDeltaHeap(t *testing.T) {
@@ -167,224 +198,61 @@ func BenchmarkMutexDelta(b *testing.B) {
 	}
 }
 
-//func TestMutexDuplicates(t *testing.T) {
-//	h := newMutexTestHelper()
-//	const cycles = 42
-//	p := h.dump(
-//		h.r(239, 239*cycles, stack0),
-//		h.r(42, 42*cycles, stack1),
-//		h.r(7, 7*cycles, stack0),
-//	)
-//	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
-//	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
-//
-//	p = h.dump(
-//		h.r(239, 239*cycles, stack0),
-//		h.r(42, 42*cycles, stack1),
-//		h.r(7, 7*cycles, stack0),
-//	)
-//	expectEmptyProfile(t, p)
-//}
-//
-//func TestHeapDuplicates(t *testing.T) {
-//	const testMemProfileRate = 524288
-//	h := newHeapTestHelper()
-//	h.rate = testMemProfileRate
-//	const blockSize = 1024
-//	p := h.dump(
-//		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
-// todo add a record with the same stack but different values
-//		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
-//		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
-//	)
-//	c1, b1 := pprof.ScaleHeapSample(239+7, (239+7)*blockSize, testMemProfileRate)
-//	expectStackFrames(t, p, stack0Marker, c1, b1, 0, 0)
-//	c2, b2 := pprof.ScaleHeapSample(42, 42*blockSize, testMemProfileRate)
-//	expectStackFrames(t, p, stack1Marker, c2, b2, 0, 0)
-//
-//	p = h.dump(
-//		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
-//		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
-//		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
-//	)
-//	expectEmptyProfile(t, p)
-//}
-
-type allocEntry struct {
-	pc     []int64
-	values []int64
-	sample *gprofile.Sample
-}
-
-func (e *allocEntry) String() string {
-	return fmt.Sprintf("%+v %+v %+v", pprofSampleStackToString(e.sample), e.values, e.sample.NumLabel)
-}
-func (e *allocEntry) String2() string {
-	return fmt.Sprintf("%+v %+v", e.pc, e.values)
-}
-
-func TestChanAllocDup(t *testing.T) {
-	prevRate := runtime.MemProfileRate
-	runtime.MemProfileRate = 1
-	defer func() {
-		runtime.MemProfileRate = prevRate
-	}()
-	h := newHeapTestHelper()
-	h.rate = int64(runtime.MemProfileRate)
-
-	tests := []int{0, 1024}
-	profiles := []*bytes.Buffer{
-		bytes.NewBuffer(nil),
-		bytes.NewBuffer(nil),
-	}
-	f := func(i int) {
-		for _, test := range tests {
-			_ = make(chan int, test)
-			_ = make(chan structWithPointers, test) // with pointers
-		}
-		runtime.GC()
-		runtime.GC()
-		p := profiles[i]
-		_ = WriteHeapProto(h.dp, h.opt, p, dumpMemProfileRecords(), h.rate)
-	}
-	for i := 0; i < 2; i++ {
-		f(i)
-	}
-	runtime.MemProfileRate = prevRate
-
-	const pattern = "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup;github.com/grafana/pyroscope-go/godeltaprof/compat.TestChanAllocDup.func2$"
-
-	compareSamplesWithPattern(t, profiles[0], profiles[1], 5, pattern, func(s *gprofile.Sample) bool {
-		return true
-	})
-
-}
-
-func compareSamplesWithPattern(t *testing.T, p0, p1 *bytes.Buffer, gtn int, pattern string, extraFilter func(s *gprofile.Sample) bool) {
-	var entries [][]allocEntry
-	var strEntries [][]string
-	profiles := []*bytes.Buffer{p0, p1}
-	for _, profile := range profiles {
-		pp, err := gprofile.ParseData(profile.Bytes())
-		require.NoError(t, err)
-		samples := grepSamples(pp, pattern)
-		es := []allocEntry{}
-		for _, sample := range samples {
-			if !extraFilter(sample) {
-				continue
-			}
-			e := allocEntry{}
-			for _, location := range sample.Location {
-				e.pc = append(e.pc, int64(location.Address))
-			}
-			e.values = sample.Value
-			e.sample = sample
-			es = append(es, e)
-		}
-		entries = append(entries, es)
-	}
-	cmpi := func(x, y int64) int {
-		if x < y {
-			return -1
-		}
-		if x > y {
-			return +1
-		}
-		return 0
-	}
-	cmp := func(s1, s2 []int64) int {
-		for i, v1 := range s1 {
-			if i >= len(s2) {
-				return +1
-			}
-			v2 := s2[i]
-			if c := cmpi(v1, v2); c != 0 {
-				return c
-			}
-		}
-		if len(s1) < len(s2) {
-			return -1
-		}
-		return 0
-	}
-
-	pcmaps := []map[string]int{}
-
-	for i, es := range entries {
-		sort.Slice(es, func(i, j int) bool {
-			if c := cmp(es[i].pc, es[j].pc); c != 0 {
-				return c < 0
-			}
-			if c := cmp(es[i].values, es[j].values); c != 0 {
-				return c < 0
-			}
-			return false
-		})
-		ss := []string{}
-		fmt.Printf("========================== %d\n", i)
-		pcmap := map[string]int{}
-		for _, e := range es {
-			ss = append(ss, e.String())
-			fmt.Printf("%s\n", e.String())
-			fmt.Printf("%s\n", e.String2())
-			pcmap[fmt.Sprintf("%+v", e.pc)]++
-		}
-		strEntries = append(strEntries, ss)
-		pcmaps = append(pcmaps, pcmap)
-	}
-
-	assert.Equal(t, strEntries[0], strEntries[1])
-	assert.GreaterOrEqual(t, len(strEntries[0]), gtn)
-
-	cnt := 0
-	pcmap := pcmaps[0]
-	for _, v := range pcmap {
-		if v > 1 {
-			cnt++
-		}
-	}
-	assert.Greater(t, cnt, 0)
-}
+func TestMutexDuplicates(t *testing.T) {
+	h := newMutexTestHelper()
+	const cycles = 42
+	p := h.dump(
+		h.r(239, 239*cycles, stack0),
+		h.r(42, 42*cycles, stack1),
+		h.r(7, 7*cycles, stack0),
+	)
+	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
+	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
 
-type structWithPointers struct {
-	t1 *testing.T
-	t2 *testing.T
+	p = h.dump(
+		h.r(239, 239*cycles, stack0),
+		h.r(42, 42*cycles, stack1),
+		h.r(7, 7*cycles, stack0),
+	)
+	expectEmptyProfile(t, p)
 }
 
-// todo we should merge these allocations with an option
-func TestMapAlloc(t *testing.T) {
-	prevRate := runtime.MemProfileRate
-	runtime.MemProfileRate = 1
-	defer func() {
-		runtime.MemProfileRate = prevRate
-	}()
+func TestHeapDuplicates(t *testing.T) {
+	const testMemProfileRate = 524288
 	h := newHeapTestHelper()
-	h.rate = int64(runtime.MemProfileRate)
-
-	profiles := []*bytes.Buffer{
-		bytes.NewBuffer(nil),
-		bytes.NewBuffer(nil),
-	}
-	f := func(i int) {
-		mm := make(map[string]structWithPointers)
-		for i := 0; i < 2048; i++ {
-			k := fmt.Sprintf("k_____%d", i)
-			mm[k] = structWithPointers{t, t}
-		}
-		runtime.GC()
-		runtime.GC()
-		p := profiles[i]
-		_ = WriteHeapProto(h.dp, h.opt, p, dumpMemProfileRecords(), h.rate)
-	}
-	for i := 0; i < 2; i++ {
-		f(i)
+	h.rate = testMemProfileRate
+	const blockSize = 1024
+	const blockSize2 = 1024
+	p := h.dump(
+		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+		h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0),
+		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+		h.r(3, 3*blockSize, 3, 3*blockSize, stack2),
+		h.r(5, 5*blockSize, 5, 5*blockSize, stack3),
+		h.r(11, 11*blockSize, 11, 11*blockSize, stack4),
+	)
+	scale := func(c, b int) []int64 {
+		c1, b1 := pprof.ScaleHeapSample(int64(c), int64(b), testMemProfileRate)
+		return []int64{c1, b1, 0, 0}
 	}
-
-	runtime.MemProfileRate = prevRate
-
-	compareSamplesWithPattern(t, profiles[0], profiles[1], 9, "^testing.tRunner;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc;github.com/grafana/pyroscope-go/godeltaprof/compat.TestMapAlloc.func2$", func(s *gprofile.Sample) bool {
-		bs := s.NumLabel["bytes"]
-		return len(bs) == 1 && bs[0] != 288 && bs[0] != 16 // i have no idea what this is, this is to fix fragile test
-	})
-
+	expectStackFrames(t, p, stack0Marker, scale(239+7, (239+7)*blockSize)...)
+	expectStackFrames(t, p, stack1Marker, scale(42, 42*blockSize)...)
+
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, scale(239+7, (239+7)*blockSize)...)
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, scale(42, 42*blockSize)...)
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(3, 3*blockSize)...)
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(5, 5*blockSize)...)
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(11, 11*blockSize)...)
+
+	p = h.dump(
+		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
+		h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0),
+		h.r(42, 42*blockSize, 42, 42*blockSize, stack1),
+		h.r(7, 7*blockSize, 7, 7*blockSize, stack0),
+		h.r(3, 3*blockSize, 3, 3*blockSize, stack2),
+		h.r(5, 5*blockSize, 5, 5*blockSize, stack3),
+		h.r(11, 11*blockSize, 11, 11*blockSize, stack4),
+	)
+	expectEmptyProfile(t, p)
 }

From 4676604ad20513ed9a90b0c485f87311cf936e10 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 20:00:13 +0700
Subject: [PATCH 17/33] fix mutex

---
 godeltaprof/compat/delta_test.go          |  8 ++++++--
 godeltaprof/compat/stackcollapse.go       |  6 ++++--
 godeltaprof/compat/testdata.go            |  6 ++++++
 godeltaprof/internal/pprof/delta_mutex.go | 15 +++++++++++++--
 godeltaprof/internal/pprof/map.go         |  1 +
 5 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index d2d9678..7f2836a 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -206,8 +206,12 @@ func TestMutexDuplicates(t *testing.T) {
 		h.r(42, 42*cycles, stack1),
 		h.r(7, 7*cycles, stack0),
 	)
-	expectStackFrames(t, p, stack0Marker, 239+7, (239+7)*cycles)
-	expectStackFrames(t, p, stack1Marker, 239+7, (239+7)*cycles)
+
+	expectStackFrames(t, p, stack0Marker, h.scale2(239+7, (239+7)*cycles)...)
+	expectStackFrames(t, p, stack1Marker, h.scale2(42, (42)*cycles)...)
+
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, h.scale2(239+7, (239+7)*cycles)...)
+	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, h.scale2(42, 42*cycles)...)
 
 	p = h.dump(
 		h.r(239, 239*cycles, stack0),
diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index 59a0e2f..b2b4b19 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -2,6 +2,7 @@ package compat
 
 import (
 	"bytes"
+	"fmt"
 	"io"
 	"reflect"
 	"regexp"
@@ -45,6 +46,7 @@ func expectNoStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string) {
 }
 
 func expectStackFrames(t *testing.T, buffer *bytes.Buffer, sfPattern string, values ...int64) {
+	fmt.Printf("expectStackFrames: %s %+v\n", sfPattern, values)
 	profile, err := gprofile.ParseData(buffer.Bytes())
 	require.NoError(t, err)
 	line := findStack(t, stackCollapseProfile(t, profile), sfPattern)
@@ -61,8 +63,8 @@ func expectPPROFLocations(t *testing.T, buffer *bytes.Buffer, samplePattern stri
 	require.NoError(t, err)
 	cnt := 0
 	samples := grepSamples(profile, samplePattern)
-	for i := range samples {
-		if reflect.DeepEqual(profile.Sample[i].Value, expectedValues) {
+	for _, s := range samples {
+		if reflect.DeepEqual(s.Value, expectedValues) {
 			cnt++
 		}
 	}
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index 1bfc1df..fc9b082 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -221,6 +221,12 @@ func (h *mutexTestHelper) scale(rcount, rcycles int64) (int64, int64) {
 	inanosec := int64(nanosec)
 	return count, inanosec
 }
+
+func (h *mutexTestHelper) scale2(rcount, rcycles int64) []int64 {
+	c, n := h.scale(rcount, rcycles)
+	return []int64{c, n}
+}
+
 func (h *mutexTestHelper) dump(r ...runtime.BlockProfileRecord) *bytes.Buffer {
 	buf := bytes.NewBuffer(nil)
 	err := PrintCountCycleProfile(h.dp, h.opt, buf, h.scaler, r)
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index 0b2500a..06568f0 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -20,11 +20,22 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 	values := []int64{0, 0}
 	var locs []uint64
 	for _, r := range records {
-		count, nanosec := ScaleMutexProfile(scaler, r.Count, float64(r.Cycles)/cpuGHz)
+		entry := d.m.Lookup(r.Stack(), 0)
+		entry.acc.v1 += r.Count // accumulate unscaled
+		entry.acc.v2 += r.Cycles
+	}
+	for _, r := range records {
+		entry := d.m.Lookup(r.Stack(), 0)
+		accCount := entry.acc.v1
+		accCycles := entry.acc.v2
+		if accCount == 0 && accCycles == 0 { //todo check if this is correct
+			continue
+		}
+		entry.acc = count{}
+		count, nanosec := ScaleMutexProfile(scaler, accCount, float64(accCycles)/cpuGHz)
 		inanosec := int64(nanosec)
 
 		// do the delta
-		entry := d.m.Lookup(r.Stack(), 0)
 		values[0] = count - entry.count.v1
 		values[1] = inanosec - entry.count.v2
 		entry.count.v1 = count
diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go
index 188001e..a950fdb 100644
--- a/godeltaprof/internal/pprof/map.go
+++ b/godeltaprof/internal/pprof/map.go
@@ -29,6 +29,7 @@ type profMapEntry struct {
 	stk      []uintptr
 	tag      uintptr
 	count    count
+	acc      count
 }
 
 func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry {

From c2caa4ed5745ee4e5c862c839ce124f240b740ac Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 23:42:54 +0700
Subject: [PATCH 18/33] fix heap

---
 godeltaprof/compat/delta_test.go          | 27 +++++++++++------
 godeltaprof/compat/stackcollapse.go       | 10 +++----
 godeltaprof/internal/pprof/delta_heap.go  | 35 +++++++++++++++++++----
 godeltaprof/internal/pprof/delta_mutex.go |  8 +++---
 godeltaprof/internal/pprof/map.go         |  3 +-
 5 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 7f2836a..6b50273 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -2,7 +2,9 @@ package compat
 
 import (
 	"fmt"
+	gprofile "github.com/google/pprof/profile"
 	"github.com/grafana/pyroscope-go/godeltaprof/internal/pprof"
+	"github.com/stretchr/testify/assert"
 	"reflect"
 	"runtime"
 	"strings"
@@ -21,10 +23,14 @@ var (
 )
 
 func init() {
-	fs := getFunctionPointers()
-
-	stack0 = [32]uintptr{fs[0], fs[1]}
-	stack1 = [32]uintptr{fs[2], fs[3]}
+	stack0 = [32]uintptr{
+		reflect.ValueOf(assert.Truef).Pointer(),
+		reflect.ValueOf(assert.CallerInfo).Pointer(),
+	}
+	stack1 = [32]uintptr{
+		reflect.ValueOf(assert.Condition).Pointer(),
+		reflect.ValueOf(assert.Conditionf).Pointer(),
+	}
 	stack2 = [32]uintptr{
 		reflect.ValueOf(runtime.GC).Pointer(),
 		reflect.ValueOf(runtime.FuncForPC).Pointer(),
@@ -54,7 +60,7 @@ func init() {
 	}
 	stack0Marker = marker(stack0[:2])
 	stack1Marker = marker(stack1[:2])
-	stack2Marker = marker(stack2[:2])
+	stack2Marker = marker(stack2[2:4])
 }
 
 func TestDeltaHeap(t *testing.T) {
@@ -226,7 +232,7 @@ func TestHeapDuplicates(t *testing.T) {
 	h := newHeapTestHelper()
 	h.rate = testMemProfileRate
 	const blockSize = 1024
-	const blockSize2 = 1024
+	const blockSize2 = 2048
 	p := h.dump(
 		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
 		h.r(3, 3*blockSize2, 3, 3*blockSize2, stack0),
@@ -236,18 +242,23 @@ func TestHeapDuplicates(t *testing.T) {
 		h.r(5, 5*blockSize, 5, 5*blockSize, stack3),
 		h.r(11, 11*blockSize, 11, 11*blockSize, stack4),
 	)
+	pp, err := gprofile.ParseData(p.Bytes())
+	assert.NoError(t, err)
+
 	scale := func(c, b int) []int64 {
 		c1, b1 := pprof.ScaleHeapSample(int64(c), int64(b), testMemProfileRate)
 		return []int64{c1, b1, 0, 0}
 	}
-	expectStackFrames(t, p, stack0Marker, scale(239+7, (239+7)*blockSize)...)
-	expectStackFrames(t, p, stack1Marker, scale(42, 42*blockSize)...)
+	//expectStackFrames(t, p, stack0Marker, scale(239+7, (239+7)*blockSize)...)
+	//expectStackFrames(t, p, stack1Marker, scale(42, 42*blockSize)...)
 
+	//printProfile(t, p)
 	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack0Marker), 1, scale(239+7, (239+7)*blockSize)...)
 	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack1Marker), 1, scale(42, 42*blockSize)...)
 	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(3, 3*blockSize)...)
 	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(5, 5*blockSize)...)
 	expectPPROFLocations(t, p, fmt.Sprintf("^%s$", stack2Marker), 1, scale(11, 11*blockSize)...)
+	assert.Equal(t, 6, len(pp.Sample))
 
 	p = h.dump(
 		h.r(239, 239*blockSize, 239, 239*blockSize, stack0),
diff --git a/godeltaprof/compat/stackcollapse.go b/godeltaprof/compat/stackcollapse.go
index b2b4b19..8f95c17 100644
--- a/godeltaprof/compat/stackcollapse.go
+++ b/godeltaprof/compat/stackcollapse.go
@@ -122,11 +122,11 @@ func stackCollapseProfile(t testing.TB, p *gprofile.Profile) []stack {
 		unique = append(unique, s)
 
 	}
-	t.Log("============= stackCollapseProfile ================")
-	for _, s := range unique {
-		t.Log(s.line, s.value)
-	}
-	t.Log("===================================================")
+	//t.Log("============= stackCollapseProfile ================")
+	//for _, s := range unique {
+	//	t.Log(s.line, s.value)
+	//}
+	//t.Log("===================================================")
 
 	return unique
 }
diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 48a4af1..8d08672 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -15,6 +15,21 @@ type DeltaHeapProfiler struct {
 func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProfileRecord, rate int64) error {
 	values := []int64{0, 0, 0, 0}
 	var locs []uint64
+	for _, r := range p {
+		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
+			// it is a fresh bucket and it will be published after next 1-2 gc cycles
+			continue
+		}
+		var blockSize int64
+		if r.AllocObjects > 0 {
+			blockSize = r.AllocBytes / r.AllocObjects
+		}
+		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
+		entry.acc.v1 += r.AllocObjects
+		entry.acc.v2 += r.AllocBytes
+		entry.acc2.v1 += r.InUseObjects()
+		entry.acc2.v2 += r.InUseBytes()
+	}
 	for _, r := range p {
 		// do the delta
 		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
@@ -27,16 +42,24 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		}
 		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
 
-		if (r.AllocObjects - entry.count.v1) < 0 {
+		if entry.acc.v1 == 0 && entry.acc.v2 == 0 && entry.acc2.v1 == 0 && entry.acc2.v2 == 0 {
 			continue
 		}
-		AllocObjects := r.AllocObjects - entry.count.v1
-		AllocBytes := r.AllocBytes - entry.count.v2
-		entry.count.v1 = r.AllocObjects
-		entry.count.v2 = r.AllocBytes
+
+		//todo minimize the number of fields - use tag and only keep object count, we can always multiply by block size (tag) to get the bytes
+		AllocObjects := entry.acc.v1 - entry.prev.v1
+		if AllocObjects < 0 {
+			continue
+		}
+		AllocBytes := entry.acc.v2 - entry.prev.v2
+		entry.prev.v1 = entry.acc.v1
+		entry.prev.v2 = entry.acc.v2
 
 		values[0], values[1] = ScaleHeapSample(AllocObjects, AllocBytes, rate)
-		values[2], values[3] = ScaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
+		values[2], values[3] = ScaleHeapSample(entry.acc2.v1, entry.acc2.v2, rate)
+
+		entry.acc = count{}
+		entry.acc2 = count{}
 
 		if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 {
 			continue
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index 06568f0..d5089b2 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -36,10 +36,10 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 		inanosec := int64(nanosec)
 
 		// do the delta
-		values[0] = count - entry.count.v1
-		values[1] = inanosec - entry.count.v2
-		entry.count.v1 = count
-		entry.count.v2 = inanosec
+		values[0] = count - entry.prev.v1
+		values[1] = inanosec - entry.prev.v2
+		entry.prev.v1 = count
+		entry.prev.v2 = inanosec
 
 		if values[0] < 0 || values[1] < 0 {
 			continue
diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go
index a950fdb..8140b72 100644
--- a/godeltaprof/internal/pprof/map.go
+++ b/godeltaprof/internal/pprof/map.go
@@ -28,8 +28,9 @@ type profMapEntry struct {
 	nextAll  *profMapEntry // next in list of all entries
 	stk      []uintptr
 	tag      uintptr
-	count    count
+	prev     count
 	acc      count
+	acc2     count //todo make it generic?? drop go16,go17 support?
 }
 
 func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry {

From 28e67beca07ee7b6992ae476f7ba9e5487c7b15d Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 23:54:26 +0700
Subject: [PATCH 19/33] make mprof map generic

---
 godeltaprof/go.mod                        |  4 +--
 godeltaprof/go.sum                        |  2 ++
 godeltaprof/internal/pprof/delta_heap.go  | 38 ++++++++++++++---------
 godeltaprof/internal/pprof/delta_mutex.go | 30 ++++++++++++------
 godeltaprof/internal/pprof/map.go         | 31 +++++++++---------
 5 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/godeltaprof/go.mod b/godeltaprof/go.mod
index 0fbc602..e36d42c 100644
--- a/godeltaprof/go.mod
+++ b/godeltaprof/go.mod
@@ -1,5 +1,5 @@
 module github.com/grafana/pyroscope-go/godeltaprof
 
-go 1.16
+go 1.18
 
-require github.com/klauspost/compress v1.17.3
+require github.com/klauspost/compress v1.17.8
diff --git a/godeltaprof/go.sum b/godeltaprof/go.sum
index 0d8e8f5..d4745f7 100644
--- a/godeltaprof/go.sum
+++ b/godeltaprof/go.sum
@@ -1,2 +1,4 @@
 github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA=
 github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
+github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 8d08672..21803d1 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -6,8 +6,20 @@ import (
 	"strings"
 )
 
+type heapPrevValue struct {
+	allocObjects int64
+	allocBytes   int64
+}
+
+type heapAccValue struct {
+	allocObjects int64
+	allocBytes   int64
+	inuseObjects int64
+	inuseBytes   int64
+}
+
 type DeltaHeapProfiler struct {
-	m profMap
+	m profMap[heapPrevValue, heapAccValue]
 	//todo consider adding an option to remove block size label and merge allocations of different size
 }
 
@@ -25,10 +37,10 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
 		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
-		entry.acc.v1 += r.AllocObjects
-		entry.acc.v2 += r.AllocBytes
-		entry.acc2.v1 += r.InUseObjects()
-		entry.acc2.v2 += r.InUseBytes()
+		entry.acc.allocObjects += r.AllocObjects
+		entry.acc.allocBytes += r.AllocBytes
+		entry.acc.inuseObjects += r.InUseObjects()
+		entry.acc.inuseBytes += r.InUseBytes()
 	}
 	for _, r := range p {
 		// do the delta
@@ -41,25 +53,23 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
 		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
-
-		if entry.acc.v1 == 0 && entry.acc.v2 == 0 && entry.acc2.v1 == 0 && entry.acc2.v2 == 0 {
+		if entry.acc == (heapAccValue{}) {
 			continue
 		}
 
 		//todo minimize the number of fields - use tag and only keep object count, we can always multiply by block size (tag) to get the bytes
-		AllocObjects := entry.acc.v1 - entry.prev.v1
+		AllocObjects := entry.acc.allocObjects - entry.prev.allocObjects
 		if AllocObjects < 0 {
 			continue
 		}
-		AllocBytes := entry.acc.v2 - entry.prev.v2
-		entry.prev.v1 = entry.acc.v1
-		entry.prev.v2 = entry.acc.v2
+		AllocBytes := entry.acc.allocBytes - entry.prev.allocBytes
+		entry.prev.allocObjects = entry.acc.allocObjects
+		entry.prev.allocBytes = entry.acc.allocBytes
 
 		values[0], values[1] = ScaleHeapSample(AllocObjects, AllocBytes, rate)
-		values[2], values[3] = ScaleHeapSample(entry.acc2.v1, entry.acc2.v2, rate)
+		values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, entry.acc.inuseBytes, rate)
 
-		entry.acc = count{}
-		entry.acc2 = count{}
+		entry.acc = heapAccValue{}
 
 		if values[0] == 0 && values[1] == 0 && values[2] == 0 && values[3] == 0 {
 			continue
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index d5089b2..bc98c68 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -4,8 +4,18 @@ import (
 	"runtime"
 )
 
+type mutexPrevValue struct {
+	count    int64
+	inanosec int64
+}
+
+type mutexAccValue struct {
+	count  int64
+	cycles int64
+}
+
 type DeltaMutexProfiler struct {
-	m profMap
+	m profMap[mutexPrevValue, mutexAccValue]
 }
 
 // PrintCountCycleProfile outputs block profile records (for block or mutex profiles)
@@ -21,25 +31,25 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 	var locs []uint64
 	for _, r := range records {
 		entry := d.m.Lookup(r.Stack(), 0)
-		entry.acc.v1 += r.Count // accumulate unscaled
-		entry.acc.v2 += r.Cycles
+		entry.acc.count += r.Count // accumulate unscaled
+		entry.acc.cycles += r.Cycles
 	}
 	for _, r := range records {
 		entry := d.m.Lookup(r.Stack(), 0)
-		accCount := entry.acc.v1
-		accCycles := entry.acc.v2
+		accCount := entry.acc.count
+		accCycles := entry.acc.cycles
 		if accCount == 0 && accCycles == 0 { //todo check if this is correct
 			continue
 		}
-		entry.acc = count{}
+		entry.acc = mutexAccValue{}
 		count, nanosec := ScaleMutexProfile(scaler, accCount, float64(accCycles)/cpuGHz)
 		inanosec := int64(nanosec)
 
 		// do the delta
-		values[0] = count - entry.prev.v1
-		values[1] = inanosec - entry.prev.v2
-		entry.prev.v1 = count
-		entry.prev.v2 = inanosec
+		values[0] = count - entry.prev.count
+		values[1] = inanosec - entry.prev.inanosec
+		entry.prev.count = count
+		entry.prev.inanosec = inanosec
 
 		if values[0] < 0 || values[1] < 0 {
 			continue
diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go
index 8140b72..0e4dc09 100644
--- a/godeltaprof/internal/pprof/map.go
+++ b/godeltaprof/internal/pprof/map.go
@@ -8,11 +8,11 @@ import "unsafe"
 
 // A profMap is a map from (stack, tag) to mapEntry.
 // It grows without bound, but that's assumed to be OK.
-type profMap struct {
-	hash    map[uintptr]*profMapEntry
-	all     *profMapEntry
-	last    *profMapEntry
-	free    []profMapEntry
+type profMap[PREV any, ACC any] struct {
+	hash    map[uintptr]*profMapEntry[PREV, ACC]
+	all     *profMapEntry[PREV, ACC]
+	last    *profMapEntry[PREV, ACC]
+	free    []profMapEntry[PREV, ACC]
 	freeStk []uintptr
 }
 
@@ -23,17 +23,18 @@ type count struct {
 }
 
 // A profMapEntry is a single entry in the profMap.
-type profMapEntry struct {
-	nextHash *profMapEntry // next in hash list
-	nextAll  *profMapEntry // next in list of all entries
+// todo remove nextAll
+// todo use unsafe.Pointer + len for stk ?
+type profMapEntry[PREV any, ACC any] struct {
+	nextHash *profMapEntry[PREV, ACC] // next in hash list
+	nextAll  *profMapEntry[PREV, ACC] // next in list of all entries
 	stk      []uintptr
 	tag      uintptr
-	prev     count
-	acc      count
-	acc2     count //todo make it generic?? drop go16,go17 support?
+	prev     PREV
+	acc      ACC
 }
 
-func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry {
+func (m *profMap[PREV, ACC]) Lookup(stk []uintptr, tag uintptr) *profMapEntry[PREV, ACC] {
 	// Compute hash of (stk, tag).
 	h := uintptr(0)
 	for _, x := range stk {
@@ -44,7 +45,7 @@ func (m *profMap) Lookup(stk []uintptr, tag uintptr) *profMapEntry {
 	h += uintptr(tag) * 41
 
 	// Find entry if present.
-	var last *profMapEntry
+	var last *profMapEntry[PREV, ACC]
 Search:
 	for e := m.hash[h]; e != nil; last, e = e, e.nextHash {
 		if len(e.stk) != len(stk) || e.tag != tag {
@@ -66,7 +67,7 @@ Search:
 
 	// Add new entry.
 	if len(m.free) < 1 {
-		m.free = make([]profMapEntry, 128)
+		m.free = make([]profMapEntry[PREV, ACC], 128)
 	}
 	e := &m.free[0]
 	m.free = m.free[1:]
@@ -84,7 +85,7 @@ Search:
 		e.stk[j] = uintptr(stk[j])
 	}
 	if m.hash == nil {
-		m.hash = make(map[uintptr]*profMapEntry)
+		m.hash = make(map[uintptr]*profMapEntry[PREV, ACC])
 	}
 	m.hash[h] = e
 	if m.all == nil {

From ed47413cde15effbb8bbea3961d7f32833106534 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 23:56:25 +0700
Subject: [PATCH 20/33] go work sync

---
 go.mod | 2 +-
 go.sum | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index c6ab79c..4fe966d 100644
--- a/go.mod
+++ b/go.mod
@@ -6,4 +6,4 @@ replace github.com/grafana/pyroscope-go/godeltaprof => ./godeltaprof
 
 require github.com/grafana/pyroscope-go/godeltaprof v0.1.6
 
-require github.com/klauspost/compress v1.17.3 // indirect
+require github.com/klauspost/compress v1.17.8 // indirect
diff --git a/go.sum b/go.sum
index 0d8e8f5..01059e6 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1 @@
-github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA=
-github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=

From 9f94b26a1e4e89b7aacb3c86b77a6b997d05e46f Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Fri, 24 May 2024 23:58:34 +0700
Subject: [PATCH 21/33] rm 16 17 support

---
 .github/workflows/go.yml |  6 +-----
 Makefile                 |  5 -----
 go.mod_go16_test.txt     | 12 ------------
 3 files changed, 1 insertion(+), 22 deletions(-)
 delete mode 100644 go.mod_go16_test.txt

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 3bf470a..846b8d0 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        go: ['1.16', '1.17', '1.18', '1.19', '1.20', '1.21', '1.22', 'tip']
+        go: ['1.18', '1.19', '1.20', '1.21', '1.22', 'tip']
     steps:
       - name: Checkout
         uses: actions/checkout@v3
@@ -21,11 +21,7 @@ jobs:
         uses: actions/setup-go@v4
         with:
           go-version: ${{ matrix.go }}
-      - name: Downgrade go.mod for go 1.17 and go 1.16
-        if:  matrix.go == '1.17' || matrix.go == '1.16'
-        run: make go/mod_16_for_testing
       - name: Run go/mod
-        if:  matrix.go != '1.17' && matrix.go != '1.16'
         run: make go/mod && git diff --exit-code
       - name: Install Go stable
         if: matrix.go == 'tip'
diff --git a/Makefile b/Makefile
index abbaf43..7c2167d 100644
--- a/Makefile
+++ b/Makefile
@@ -15,8 +15,3 @@ go/mod:
 	cd godeltaprof/  && GO111MODULE=on go mod download
 	cd godeltaprof/ && GO111MODULE=on go mod tidy
 
-.PHONY: go/mod_16_for_testing
-go/mod_16_for_testing:
-	rm -rf godeltaprof/compat/go.mod godeltaprof/compat/go.sum godeltaprof/go.mod godeltaprof/go.sum go.work otelpyroscope/
-	cat go.mod_go16_test.txt > go.mod
-	go mod tidy
diff --git a/go.mod_go16_test.txt b/go.mod_go16_test.txt
deleted file mode 100644
index 0f0f0dc..0000000
--- a/go.mod_go16_test.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-module github.com/grafana/pyroscope-go
-
-go 1.16
-
-require (
-    github.com/davecgh/go-spew v1.1.1 // indirect
-    github.com/google/pprof v0.0.0-20231127191134-f3a68a39ae15
-    github.com/stretchr/testify v1.7.0
-    golang.org/x/mod v0.14.0 // indirect
-    golang.org/x/tools v0.12.0
-    gopkg.in/yaml.v3 v3.0.1 // indirect
-)

From 10617e8fc9984fc65186ce9e32c7c282b07bde86 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Sat, 25 May 2024 00:00:24 +0700
Subject: [PATCH 22/33] make go/mod

---
 go.sum             | 1 +
 go.work.sum        | 6 ------
 godeltaprof/go.sum | 2 --
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/go.sum b/go.sum
index 01059e6..734f917 100644
--- a/go.sum
+++ b/go.sum
@@ -1 +1,2 @@
 github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
+github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
diff --git a/go.work.sum b/go.work.sum
index e663de3..2922f3a 100644
--- a/go.work.sum
+++ b/go.work.sum
@@ -6,17 +6,12 @@ github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3I
 github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
 github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI=
 github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk=
-github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
-github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
 github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
 github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
 github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
 github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk=
 github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
-github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU=
 github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -27,7 +22,6 @@ github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg=
 golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
 golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
 golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
diff --git a/godeltaprof/go.sum b/godeltaprof/go.sum
index d4745f7..734f917 100644
--- a/godeltaprof/go.sum
+++ b/godeltaprof/go.sum
@@ -1,4 +1,2 @@
-github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA=
-github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
 github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=

From abbab38de27a8618bdb137babe9105ebc35465d9 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Sat, 25 May 2024 00:12:17 +0700
Subject: [PATCH 23/33] fix test 18 19

---
 godeltaprof/compat/delta_test.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6b50273..6a122bf 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -205,6 +205,10 @@ func BenchmarkMutexDelta(b *testing.B) {
 }
 
 func TestMutexDuplicates(t *testing.T) {
+	prev := runtime.SetMutexProfileFraction(-1)
+	runtime.SetMutexProfileFraction(1)
+	defer runtime.SetMutexProfileFraction(prev)
+
 	h := newMutexTestHelper()
 	const cycles = 42
 	p := h.dump(

From 525c0e3e8bc6379bd41b2e5657aa45c893d36974 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Mon, 27 May 2024 18:09:35 +0700
Subject: [PATCH 24/33] tiny optimization

---
 godeltaprof/internal/pprof/delta_heap.go  | 21 ++++++++++++++++-----
 godeltaprof/internal/pprof/delta_mutex.go | 13 ++++++++-----
 godeltaprof/internal/pprof/map.go         |  6 ------
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 21803d1..fefce10 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -27,7 +27,8 @@ type DeltaHeapProfiler struct {
 func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProfileRecord, rate int64) error {
 	values := []int64{0, 0, 0, 0}
 	var locs []uint64
-	for _, r := range p {
+	for i := range p {
+		r := &p[i]
 		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
 			// it is a fresh bucket and it will be published after next 1-2 gc cycles
 			continue
@@ -36,13 +37,14 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		if r.AllocObjects > 0 {
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
-		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
+		entry := d.m.Lookup(stack(r.Stack0[:]), uintptr(blockSize))
 		entry.acc.allocObjects += r.AllocObjects
 		entry.acc.allocBytes += r.AllocBytes
 		entry.acc.inuseObjects += r.InUseObjects()
 		entry.acc.inuseBytes += r.InUseBytes()
 	}
-	for _, r := range p {
+	for i := range p {
+		r := &p[i]
 		// do the delta
 		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
 			// it is a fresh bucket and it will be published after next 1-2 gc cycles
@@ -52,7 +54,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		if r.AllocObjects > 0 {
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
-		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
+		entry := d.m.Lookup(stack(r.Stack0[:]), uintptr(blockSize))
 		if entry.acc == (heapAccValue{}) {
 			continue
 		}
@@ -77,7 +79,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 
 		hideRuntime := true
 		for tries := 0; tries < 2; tries++ {
-			stk := r.Stack()
+			stk := stack(r.Stack0[:])
 			// For heap profiles, all stack
 			// addresses are return PCs, which is
 			// what appendLocsForStack expects.
@@ -143,3 +145,12 @@ func HeapProfileConfig(rate int64) ProfileConfig {
 		DefaultSampleType: "",
 	}
 }
+
+func stack(stk []uintptr) []uintptr {
+	for i, v := range stk {
+		if v == 0 {
+			return stk[0:i]
+		}
+	}
+	return stk
+}
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index bc98c68..3fad825 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -29,13 +29,16 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 
 	values := []int64{0, 0}
 	var locs []uint64
-	for _, r := range records {
-		entry := d.m.Lookup(r.Stack(), 0)
+	for i := range records {
+		r := &records[i]
+		entry := d.m.Lookup(stack(r.Stack0[:]), 0)
 		entry.acc.count += r.Count // accumulate unscaled
 		entry.acc.cycles += r.Cycles
 	}
-	for _, r := range records {
-		entry := d.m.Lookup(r.Stack(), 0)
+	for i := range records {
+		r := &records[i]
+		stk := stack(r.Stack0[:])
+		entry := d.m.Lookup(stk, 0)
 		accCount := entry.acc.count
 		accCycles := entry.acc.cycles
 		if accCount == 0 && accCycles == 0 { //todo check if this is correct
@@ -60,7 +63,7 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 
 		// For count profiles, all stack addresses are
 		// return PCs, which is what appendLocsForStack expects.
-		locs = b.LocsForStack(r.Stack())
+		locs = b.LocsForStack(stk)
 		b.Sample(values, locs, 0)
 	}
 	b.Build()
diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go
index 0e4dc09..49a557f 100644
--- a/godeltaprof/internal/pprof/map.go
+++ b/godeltaprof/internal/pprof/map.go
@@ -16,12 +16,6 @@ type profMap[PREV any, ACC any] struct {
 	freeStk []uintptr
 }
 
-type count struct {
-	// alloc_objects, alloc_bytes for heap
-	// mutex_count, mutex_duration for mutex
-	v1, v2 int64
-}
-
 // A profMapEntry is a single entry in the profMap.
 // todo remove nextAll
 // todo use unsafe.Pointer + len for stk ?

From 10ed1f0701276d12d0dca6dc718d56f50d6fc153 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Mon, 27 May 2024 18:20:15 +0700
Subject: [PATCH 25/33] remove unused fields i `profMap`

---
 godeltaprof/internal/pprof/map.go | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/godeltaprof/internal/pprof/map.go b/godeltaprof/internal/pprof/map.go
index 49a557f..dcb6e56 100644
--- a/godeltaprof/internal/pprof/map.go
+++ b/godeltaprof/internal/pprof/map.go
@@ -10,18 +10,14 @@ import "unsafe"
 // It grows without bound, but that's assumed to be OK.
 type profMap[PREV any, ACC any] struct {
 	hash    map[uintptr]*profMapEntry[PREV, ACC]
-	all     *profMapEntry[PREV, ACC]
-	last    *profMapEntry[PREV, ACC]
 	free    []profMapEntry[PREV, ACC]
 	freeStk []uintptr
 }
 
 // A profMapEntry is a single entry in the profMap.
-// todo remove nextAll
 // todo use unsafe.Pointer + len for stk ?
 type profMapEntry[PREV any, ACC any] struct {
 	nextHash *profMapEntry[PREV, ACC] // next in hash list
-	nextAll  *profMapEntry[PREV, ACC] // next in list of all entries
 	stk      []uintptr
 	tag      uintptr
 	prev     PREV
@@ -82,12 +78,5 @@ Search:
 		m.hash = make(map[uintptr]*profMapEntry[PREV, ACC])
 	}
 	m.hash[h] = e
-	if m.all == nil {
-		m.all = e
-		m.last = e
-	} else {
-		m.last.nextAll = e
-		m.last = e
-	}
 	return e
 }

From 5a760bf6b5c9988488528e9d05d899a1864478f8 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Tue, 28 May 2024 13:55:25 +0700
Subject: [PATCH 26/33] maybe fix bench reproducibility

---
 godeltaprof/compat/compression_test.go | 3 ++-
 godeltaprof/compat/delta_test.go       | 3 ++-
 godeltaprof/compat/testdata.go         | 5 ++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go
index ceeb4b7..f2d3cda 100644
--- a/godeltaprof/compat/compression_test.go
+++ b/godeltaprof/compat/compression_test.go
@@ -10,9 +10,10 @@ func BenchmarkHeapCompression(b *testing.B) {
 	h := newHeapTestHelper()
 	fs := h.generateMemProfileRecords(512, 32)
 	b.ResetTimer()
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
 	for i := 0; i < b.N; i++ {
 		_ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate))
-		h.mutate(fs)
+		h.mutate(nmutations, fs)
 	}
 }
 
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6a122bf..4ae28ca 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -172,9 +172,10 @@ func BenchmarkHeapDelta(b *testing.B) {
 	fs := h.generateMemProfileRecords(512, 32)
 	builder := &noopBuilder{}
 	b.ResetTimer()
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
 	for i := 0; i < b.N; i++ {
 		_ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate))
-		h.mutate(fs)
+		h.mutate(nmutations, fs)
 	}
 }
 
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index fc9b082..5127db0 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -295,10 +295,9 @@ func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int6
 	}
 }
 
-func (h *heapTestHelper) mutate(fs []runtime.MemProfileRecord) {
-	nmutations := int(h.rng.Int63() % int64(len(fs)))
+func (h *heapTestHelper) mutate(n int, fs []runtime.MemProfileRecord) {
 	objSize := fs[0].AllocBytes / fs[0].AllocObjects
-	for j := 0; j < nmutations; j++ {
+	for j := 0; j < n; j++ {
 		idx := int(uint(h.rng.Int63())) % len(fs)
 		fs[idx].AllocObjects += 1
 		fs[idx].AllocBytes += objSize

From ec7c2f05caff846e27a04413a6886e98b83b5126 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Tue, 28 May 2024 14:00:09 +0700
Subject: [PATCH 27/33] Revert "maybe fix bench reproducibility"

This reverts commit 5a760bf6b5c9988488528e9d05d899a1864478f8.
---
 godeltaprof/compat/compression_test.go | 3 +--
 godeltaprof/compat/delta_test.go       | 3 +--
 godeltaprof/compat/testdata.go         | 5 +++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go
index f2d3cda..ceeb4b7 100644
--- a/godeltaprof/compat/compression_test.go
+++ b/godeltaprof/compat/compression_test.go
@@ -10,10 +10,9 @@ func BenchmarkHeapCompression(b *testing.B) {
 	h := newHeapTestHelper()
 	fs := h.generateMemProfileRecords(512, 32)
 	b.ResetTimer()
-	nmutations := int(h.rng.Int63() % int64(len(fs)))
 	for i := 0; i < b.N; i++ {
 		_ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate))
-		h.mutate(nmutations, fs)
+		h.mutate(fs)
 	}
 }
 
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 4ae28ca..6a122bf 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -172,10 +172,9 @@ func BenchmarkHeapDelta(b *testing.B) {
 	fs := h.generateMemProfileRecords(512, 32)
 	builder := &noopBuilder{}
 	b.ResetTimer()
-	nmutations := int(h.rng.Int63() % int64(len(fs)))
 	for i := 0; i < b.N; i++ {
 		_ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate))
-		h.mutate(nmutations, fs)
+		h.mutate(fs)
 	}
 }
 
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index 5127db0..fc9b082 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -295,9 +295,10 @@ func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int6
 	}
 }
 
-func (h *heapTestHelper) mutate(n int, fs []runtime.MemProfileRecord) {
+func (h *heapTestHelper) mutate(fs []runtime.MemProfileRecord) {
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
 	objSize := fs[0].AllocBytes / fs[0].AllocObjects
-	for j := 0; j < n; j++ {
+	for j := 0; j < nmutations; j++ {
 		idx := int(uint(h.rng.Int63())) % len(fs)
 		fs[idx].AllocObjects += 1
 		fs[idx].AllocBytes += objSize

From 3d4f5ca73a2f80ac0ca44de8422b4aa14a9cf6c5 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Tue, 28 May 2024 14:43:23 +0700
Subject: [PATCH 28/33] fix BenchmarkHeapDelta and BenchmarkHeapCompression
 determinism

---
 godeltaprof/compat/compression_test.go | 12 +++++++++++-
 godeltaprof/compat/delta_test.go       | 12 +++++++++++-
 godeltaprof/compat/testdata.go         |  3 +--
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go
index ceeb4b7..592dab8 100644
--- a/godeltaprof/compat/compression_test.go
+++ b/godeltaprof/compat/compression_test.go
@@ -9,10 +9,20 @@ import (
 func BenchmarkHeapCompression(b *testing.B) {
 	h := newHeapTestHelper()
 	fs := h.generateMemProfileRecords(512, 32)
+	h.rng.Seed(239)
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
+
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
+		if i == 1000 {
+			v := h.rng.Int63()
+			if v != 7817861117094116717 {
+				b.Errorf("unexpected random value: %d. "+
+					"The bench should be deterministic for better comparision.", v)
+			}
+		}
 		_ = WriteHeapProto(h.dp, h.opt, io.Discard, fs, int64(runtime.MemProfileRate))
-		h.mutate(fs)
+		h.mutate(nmutations, fs)
 	}
 }
 
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index 6a122bf..e8d84d9 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -171,10 +171,20 @@ func BenchmarkHeapDelta(b *testing.B) {
 	h := newHeapTestHelper()
 	fs := h.generateMemProfileRecords(512, 32)
 	builder := &noopBuilder{}
+	h.rng.Seed(239)
+	nmutations := int(h.rng.Int63() % int64(len(fs)))
+
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
+		if i == 1000 {
+			v := h.rng.Int63()
+			if v != 7817861117094116717 {
+				b.Errorf("unexpected random value: %d. "+
+					"The bench should be deterministic for better comparision.", v)
+			}
+		}
 		_ = h.dp.WriteHeapProto(builder, fs, int64(runtime.MemProfileRate))
-		h.mutate(fs)
+		h.mutate(nmutations, fs)
 	}
 }
 
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index fc9b082..adf52f3 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -295,8 +295,7 @@ func (h *heapTestHelper) r(AllocObjects, AllocBytes, FreeObjects, FreeBytes int6
 	}
 }
 
-func (h *heapTestHelper) mutate(fs []runtime.MemProfileRecord) {
-	nmutations := int(h.rng.Int63() % int64(len(fs)))
+func (h *heapTestHelper) mutate(nmutations int, fs []runtime.MemProfileRecord) {
 	objSize := fs[0].AllocBytes / fs[0].AllocObjects
 	for j := 0; j < nmutations; j++ {
 		idx := int(uint(h.rng.Int63())) % len(fs)

From 912e33ea286a761677ff88c80dfb007969add751 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Tue, 28 May 2024 14:46:58 +0700
Subject: [PATCH 29/33] fix BenchmarkMutexDelta and BenchmarkMutexCompression
 determinism

---
 godeltaprof/compat/compression_test.go | 11 ++++++++++-
 godeltaprof/compat/delta_test.go       | 11 ++++++++++-
 godeltaprof/compat/testdata.go         |  3 +--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/godeltaprof/compat/compression_test.go b/godeltaprof/compat/compression_test.go
index 592dab8..b5923e3 100644
--- a/godeltaprof/compat/compression_test.go
+++ b/godeltaprof/compat/compression_test.go
@@ -40,11 +40,20 @@ func BenchmarkMutexCompression(b *testing.B) {
 			h := newMutexTestHelper()
 			h.scaler = scaler
 			fs := h.generateBlockProfileRecords(512, 32)
+			h.rng.Seed(239)
+			nmutations := int(h.rng.Int63() % int64(len(fs)))
 			b.ResetTimer()
 
 			for i := 0; i < b.N; i++ {
+				if i == 1000 {
+					v := h.rng.Int63()
+					if v != 7817861117094116717 {
+						b.Errorf("unexpected random value: %d. "+
+							"The bench should be deterministic for better comparision.", v)
+					}
+				}
 				_ = PrintCountCycleProfile(h.dp, h.opt, io.Discard, scaler, fs)
-				h.mutate(fs)
+				h.mutate(nmutations, fs)
 			}
 		})
 
diff --git a/godeltaprof/compat/delta_test.go b/godeltaprof/compat/delta_test.go
index e8d84d9..2822ef4 100644
--- a/godeltaprof/compat/delta_test.go
+++ b/godeltaprof/compat/delta_test.go
@@ -203,11 +203,20 @@ func BenchmarkMutexDelta(b *testing.B) {
 			h.scaler = scaler
 			fs := h.generateBlockProfileRecords(512, 32)
 			builder := &noopBuilder{}
+			h.rng.Seed(239)
+			nmutations := int(h.rng.Int63() % int64(len(fs)))
 			b.ResetTimer()
 
 			for i := 0; i < b.N; i++ {
+				if i == 1000 {
+					v := h.rng.Int63()
+					if v != 7817861117094116717 {
+						b.Errorf("unexpected random value: %d. "+
+							"The bench should be deterministic for better comparision.", v)
+					}
+				}
 				_ = h.dp.PrintCountCycleProfile(builder, scaler, fs)
-				h.mutate(fs)
+				h.mutate(nmutations, fs)
 			}
 		})
 
diff --git a/godeltaprof/compat/testdata.go b/godeltaprof/compat/testdata.go
index adf52f3..be95c9e 100644
--- a/godeltaprof/compat/testdata.go
+++ b/godeltaprof/compat/testdata.go
@@ -246,8 +246,7 @@ func (h *mutexTestHelper) r(count, cycles int64, s [32]uintptr) runtime.BlockPro
 	}
 }
 
-func (h *mutexTestHelper) mutate(fs []runtime.BlockProfileRecord) {
-	nmutations := int(h.rng.Int63() % int64(len(fs)))
+func (h *mutexTestHelper) mutate(nmutations int, fs []runtime.BlockProfileRecord) {
 	oneBlockCycles := fs[0].Cycles / fs[0].Count
 	for j := 0; j < nmutations; j++ {
 		idx := int(uint(h.rng.Int63())) % len(fs)

From 864a4c5b411d42355486300e9aa33224467152b9 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Tue, 28 May 2024 15:23:29 +0700
Subject: [PATCH 30/33] save a bit of memory

---
 godeltaprof/internal/pprof/delta_heap.go | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index fefce10..6f263ce 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -8,14 +8,11 @@ import (
 
 type heapPrevValue struct {
 	allocObjects int64
-	allocBytes   int64
 }
 
 type heapAccValue struct {
 	allocObjects int64
-	allocBytes   int64
 	inuseObjects int64
-	inuseBytes   int64
 }
 
 type DeltaHeapProfiler struct {
@@ -39,9 +36,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		}
 		entry := d.m.Lookup(stack(r.Stack0[:]), uintptr(blockSize))
 		entry.acc.allocObjects += r.AllocObjects
-		entry.acc.allocBytes += r.AllocBytes
 		entry.acc.inuseObjects += r.InUseObjects()
-		entry.acc.inuseBytes += r.InUseBytes()
 	}
 	for i := range p {
 		r := &p[i]
@@ -59,17 +54,15 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 			continue
 		}
 
-		//todo minimize the number of fields - use tag and only keep object count, we can always multiply by block size (tag) to get the bytes
 		AllocObjects := entry.acc.allocObjects - entry.prev.allocObjects
 		if AllocObjects < 0 {
 			continue
 		}
-		AllocBytes := entry.acc.allocBytes - entry.prev.allocBytes
+		AllocBytes := AllocObjects * blockSize
 		entry.prev.allocObjects = entry.acc.allocObjects
-		entry.prev.allocBytes = entry.acc.allocBytes
 
 		values[0], values[1] = ScaleHeapSample(AllocObjects, AllocBytes, rate)
-		values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, entry.acc.inuseBytes, rate)
+		values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, entry.acc.inuseObjects*blockSize, rate)
 
 		entry.acc = heapAccValue{}
 

From 1596d12890378c3d231276c3e897eac508ce7fdb Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Thu, 30 May 2024 13:14:23 +0800
Subject: [PATCH 31/33] fix gotip compilation again

---
 godeltaprof/compat/stub_go23_test.go    | 3 +++
 godeltaprof/internal/pprof/stub_go23.go | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/godeltaprof/compat/stub_go23_test.go b/godeltaprof/compat/stub_go23_test.go
index bf495a9..6c2d227 100644
--- a/godeltaprof/compat/stub_go23_test.go
+++ b/godeltaprof/compat/stub_go23_test.go
@@ -36,6 +36,9 @@ func TestRuntimeCyclesPerSecond(t *testing.T) {
 	checkSignature(t, "runtime",
 		"pprof_cyclesPerSecond",
 		"func runtime.pprof_cyclesPerSecond() int64")
+	checkSignature(t, "runtime/pprof",
+		"pprof_cyclesPerSecond",
+		"func runtime/pprof.pprof_cyclesPerSecond() int64")
 	checkSignature(t, "github.com/grafana/pyroscope-go/godeltaprof/internal/pprof",
 		"runtime_cyclesPerSecond",
 		"func github.com/grafana/pyroscope-go/godeltaprof/internal/pprof.runtime_cyclesPerSecond() int64")
diff --git a/godeltaprof/internal/pprof/stub_go23.go b/godeltaprof/internal/pprof/stub_go23.go
index 37ccbe8..d587cad 100644
--- a/godeltaprof/internal/pprof/stub_go23.go
+++ b/godeltaprof/internal/pprof/stub_go23.go
@@ -23,5 +23,5 @@ func runtime_FrameSymbolName(f *runtime.Frame) string
 //go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
 func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
 
-//go:linkname runtime_cyclesPerSecond runtime.pprof_cyclesPerSecond
+//go:linkname runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
 func runtime_cyclesPerSecond() int64

From eb3517f01391f2eee0ab73f03ecd4fd65986b164 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Mon, 3 Jun 2024 18:56:24 +0800
Subject: [PATCH 32/33] use r.Stack(), add comments avoid blockSize
 multiplications

---
 godeltaprof/internal/pprof/delta_heap.go  | 31 ++++++++++-------------
 godeltaprof/internal/pprof/delta_mutex.go |  6 ++---
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 6f263ce..8bb1a9e 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -34,7 +34,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		if r.AllocObjects > 0 {
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
-		entry := d.m.Lookup(stack(r.Stack0[:]), uintptr(blockSize))
+		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
 		entry.acc.allocObjects += r.AllocObjects
 		entry.acc.inuseObjects += r.InUseObjects()
 	}
@@ -49,20 +49,26 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		if r.AllocObjects > 0 {
 			blockSize = r.AllocBytes / r.AllocObjects
 		}
-		entry := d.m.Lookup(stack(r.Stack0[:]), uintptr(blockSize))
+		entry := d.m.Lookup(r.Stack(), uintptr(blockSize))
 		if entry.acc == (heapAccValue{}) {
 			continue
 		}
 
-		AllocObjects := entry.acc.allocObjects - entry.prev.allocObjects
-		if AllocObjects < 0 {
+		allocObjects := entry.acc.allocObjects - entry.prev.allocObjects
+		if allocObjects < 0 {
 			continue
 		}
-		AllocBytes := AllocObjects * blockSize
+
+		// allocBytes, inuseBytes is calculated as multiplication of number of objects by blockSize
+		// This is done to reduce the size of the map entry (i.e. heapAccValue for deduplication and
+		// heapPrevValue for keeping the delta).
+
+		allocBytes := allocObjects * blockSize
 		entry.prev.allocObjects = entry.acc.allocObjects
+		inuseBytes := entry.acc.inuseObjects * blockSize
 
-		values[0], values[1] = ScaleHeapSample(AllocObjects, AllocBytes, rate)
-		values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, entry.acc.inuseObjects*blockSize, rate)
+		values[0], values[1] = ScaleHeapSample(allocObjects, allocBytes, rate)
+		values[2], values[3] = ScaleHeapSample(entry.acc.inuseObjects, inuseBytes, rate)
 
 		entry.acc = heapAccValue{}
 
@@ -72,7 +78,7 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 
 		hideRuntime := true
 		for tries := 0; tries < 2; tries++ {
-			stk := stack(r.Stack0[:])
+			stk := r.Stack()
 			// For heap profiles, all stack
 			// addresses are return PCs, which is
 			// what appendLocsForStack expects.
@@ -138,12 +144,3 @@ func HeapProfileConfig(rate int64) ProfileConfig {
 		DefaultSampleType: "",
 	}
 }
-
-func stack(stk []uintptr) []uintptr {
-	for i, v := range stk {
-		if v == 0 {
-			return stk[0:i]
-		}
-	}
-	return stk
-}
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index 3fad825..03290e7 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -31,17 +31,17 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 	var locs []uint64
 	for i := range records {
 		r := &records[i]
-		entry := d.m.Lookup(stack(r.Stack0[:]), 0)
+		entry := d.m.Lookup(r.Stack(), 0)
 		entry.acc.count += r.Count // accumulate unscaled
 		entry.acc.cycles += r.Cycles
 	}
 	for i := range records {
 		r := &records[i]
-		stk := stack(r.Stack0[:])
+		stk := r.Stack()
 		entry := d.m.Lookup(stk, 0)
 		accCount := entry.acc.count
 		accCycles := entry.acc.cycles
-		if accCount == 0 && accCycles == 0 { //todo check if this is correct
+		if accCount == 0 && accCycles == 0 {
 			continue
 		}
 		entry.acc = mutexAccValue{}

From 560126f177101cd1b6ca8d14f04dab5279a1b655 Mon Sep 17 00:00:00 2001
From: Tolya Korniltsev <korniltsev.anatoly@gmail.com>
Date: Mon, 3 Jun 2024 19:56:27 +0800
Subject: [PATCH 33/33] add more comments

---
 godeltaprof/internal/pprof/delta_heap.go  | 3 ++-
 godeltaprof/internal/pprof/delta_mutex.go | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/godeltaprof/internal/pprof/delta_heap.go b/godeltaprof/internal/pprof/delta_heap.go
index 8bb1a9e..883d020 100644
--- a/godeltaprof/internal/pprof/delta_heap.go
+++ b/godeltaprof/internal/pprof/delta_heap.go
@@ -24,6 +24,7 @@ type DeltaHeapProfiler struct {
 func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProfileRecord, rate int64) error {
 	values := []int64{0, 0, 0, 0}
 	var locs []uint64
+	// deduplicate: accumulate allocObjects and inuseObjects in entry.acc for equal stacks
 	for i := range p {
 		r := &p[i]
 		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
@@ -38,9 +39,9 @@ func (d *DeltaHeapProfiler) WriteHeapProto(b ProfileBuilder, p []runtime.MemProf
 		entry.acc.allocObjects += r.AllocObjects
 		entry.acc.inuseObjects += r.InUseObjects()
 	}
+	// do the delta using the accumulated values and previous values
 	for i := range p {
 		r := &p[i]
-		// do the delta
 		if r.AllocBytes == 0 && r.AllocObjects == 0 && r.FreeObjects == 0 && r.FreeBytes == 0 {
 			// it is a fresh bucket and it will be published after next 1-2 gc cycles
 			continue
diff --git a/godeltaprof/internal/pprof/delta_mutex.go b/godeltaprof/internal/pprof/delta_mutex.go
index 03290e7..5c177e3 100644
--- a/godeltaprof/internal/pprof/delta_mutex.go
+++ b/godeltaprof/internal/pprof/delta_mutex.go
@@ -29,12 +29,15 @@ func (d *DeltaMutexProfiler) PrintCountCycleProfile(b ProfileBuilder, scaler Mut
 
 	values := []int64{0, 0}
 	var locs []uint64
+	// deduplicate: accumulate count and cycles in entry.acc for equal stacks
 	for i := range records {
 		r := &records[i]
 		entry := d.m.Lookup(r.Stack(), 0)
 		entry.acc.count += r.Count // accumulate unscaled
 		entry.acc.cycles += r.Cycles
 	}
+
+	// do the delta using the accumulated values and previous values
 	for i := range records {
 		r := &records[i]
 		stk := r.Stack()