From 3d09fd83fe6ab5f5ae338cfce49b8dd5f4f69a89 Mon Sep 17 00:00:00 2001 From: moul <94029+moul@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:43:05 +0100 Subject: [PATCH] feat: extend avl's implementation to add cow support Signed-off-by: moul <94029+moul@users.noreply.github.com> --- examples/gno.land/p/moul/cow/gno.mod | 2 +- examples/gno.land/p/moul/cow/node.gno | 20 +- examples/gno.land/p/moul/cow/node_test.gno | 103 +++++++++ examples/gno.land/p/moul/cow/tree.gno | 129 ++++++++++-- examples/gno.land/p/moul/cow/tree_test.gno | 233 ++++++++++++++++++++- 5 files changed, 458 insertions(+), 29 deletions(-) diff --git a/examples/gno.land/p/moul/cow/gno.mod b/examples/gno.land/p/moul/cow/gno.mod index 11076765a0f..98344718182 100644 --- a/examples/gno.land/p/moul/cow/gno.mod +++ b/examples/gno.land/p/moul/cow/gno.mod @@ -1 +1 @@ -module gno.land/p/demo/avl/cow \ No newline at end of file +module gno.land/p/moul/cow \ No newline at end of file diff --git a/examples/gno.land/p/moul/cow/node.gno b/examples/gno.land/p/moul/cow/node.gno index 8a641e27f70..b9e5344bc75 100644 --- a/examples/gno.land/p/moul/cow/node.gno +++ b/examples/gno.land/p/moul/cow/node.gno @@ -130,23 +130,29 @@ func (node *Node) Set(key string, value interface{}) (newSelf *Node, updated boo return NewNode(key, value), false } + // Always create a new node for leaf nodes if node.height == 0 { + if key == node.key { + // Create new node even when updating + return NewNode(key, value), true + } return node.setLeaf(key, value) } - node = node._copy() + // Copy the node before modifying + newNode := node._copy() if key < node.key { - node.leftNode, updated = node.getLeftNode().Set(key, value) + newNode.leftNode, updated = node.getLeftNode().Set(key, value) } else { - node.rightNode, updated = node.getRightNode().Set(key, value) + newNode.rightNode, updated = node.getRightNode().Set(key, value) } - if updated { - return node, updated + if !updated { + newNode.calcHeightAndSize() + return newNode.balance(), updated } - node.calcHeightAndSize() - return node.balance(), updated + return newNode, updated } // setLeaf inserts a new leaf node with the given key-value pair into the subtree rooted at the node, diff --git a/examples/gno.land/p/moul/cow/node_test.gno b/examples/gno.land/p/moul/cow/node_test.gno index be4471ec4eb..51d1808fa3d 100644 --- a/examples/gno.land/p/moul/cow/node_test.gno +++ b/examples/gno.land/p/moul/cow/node_test.gno @@ -553,3 +553,106 @@ func reverseSlice(ss []string) { ss[i], ss[j] = ss[j], ss[i] } } + +func TestNodeStructuralSharing(t *testing.T) { + t.Run("unmodified paths remain shared", func(t *testing.T) { + root := NewNode("B", 2) + root, _ = root.Set("A", 1) + root, _ = root.Set("C", 3) + + originalRight := root.rightNode + newRoot, _ := root.Set("A", 10) + + if newRoot.rightNode != originalRight { + t.Error("Unmodified right subtree should remain shared") + } + }) + + t.Run("multiple modifications reuse shared structure", func(t *testing.T) { + // Create initial tree + root := NewNode("B", 2) + root, _ = root.Set("A", 1) + root, _ = root.Set("C", 3) + + // Store original nodes + originalRight := root.rightNode + + // First modification + mod1, _ := root.Set("A", 10) + + // Second modification + mod2, _ := mod1.Set("C", 30) + + // Check sharing in first modification + if mod1.rightNode != originalRight { + t.Error("First modification should share unmodified right subtree") + } + + // Check that second modification creates new right node + if mod2.rightNode == originalRight { + t.Error("Second modification should create new right node") + } + }) +} + +func TestNodeCopyOnWrite(t *testing.T) { + t.Run("copy preserves structure", func(t *testing.T) { + root := NewNode("B", 2) + root, _ = root.Set("A", 1) + root, _ = root.Set("C", 3) + + // Only copy non-leaf nodes + if !root.IsLeaf() { + copied := root._copy() + if copied == root { + t.Error("Copy should create new instance") + } + if !nodesEqual(copied, root) { + t.Error("Copied node should preserve structure") + } + } + }) + + t.Run("removal copy pattern", func(t *testing.T) { + // Create a more complex tree to test removal + root := NewNode("B", 2) + root, _ = root.Set("A", 1) + root, _ = root.Set("C", 3) + root, _ = root.Set("D", 4) // Add this to ensure proper tree structure + + // Store references to original nodes + originalRight := root.rightNode + originalRightRight := originalRight.rightNode + + // Remove "A" which should only affect the left subtree + newRoot, _, _, _ := root.Remove("A") + + // Verify right subtree remains unchanged and shared + if newRoot.rightNode != originalRight { + t.Error("Right subtree should remain shared during removal of left node") + } + + // Also verify deeper nodes remain shared + if newRoot.rightNode.rightNode != originalRightRight { + t.Error("Deep right subtree should remain shared during removal") + } + + // Verify original tree is unchanged + if _, _, exists := root.Get("A"); !exists { + t.Error("Original tree should remain unchanged") + } + }) + + t.Run("copy leaf node panic", func(t *testing.T) { + leaf := NewNode("A", 1) + + defer func() { + if r := recover(); r == nil { + t.Error("Expected panic when copying leaf node") + } + }() + + // This should panic with our specific message + leaf._copy() + }) +} diff --git a/examples/gno.land/p/moul/cow/tree.gno b/examples/gno.land/p/moul/cow/tree.gno index 4abe80cb58a..3e45454caa5 100644 --- a/examples/gno.land/p/moul/cow/tree.gno +++ b/examples/gno.land/p/moul/cow/tree.gno @@ -1,23 +1,80 @@ +// Package cow provides a Copy-on-Write (CoW) AVL tree implementation. +// +// Copy-on-Write is an optimization strategy that creates a copy of a data structure +// only when it is modified, while still presenting the appearance of a full copy. +// When a tree is cloned, instead of copying the entire structure, it initially +// shares all its nodes with the original tree. Only when a modification is made +// to either the original or the clone are new nodes created, and only along the +// path from the root to the modified node. +// +// This implementation is based on the standard AVL tree (examples/gno.land/p/demo/avl) +// and fully implements its ITree interface, while adding a Clone() method for CoW +// functionality. The tree maintains all AVL properties (self-balancing, O(log n) +// operations) while providing efficient copying through structural sharing. +// +// Key features: +// - O(1) cloning operation +// - Minimal memory usage through structural sharing +// - Full AVL tree functionality (self-balancing, ordered operations) +// - Thread-safe for concurrent reads of shared structures +// +// Graph Theory Optimization: +// The implementation uses graph theory principles to minimize the number of node +// copies required. When a modification is made, only the nodes along the path +// from the root to the modified node need to be copied, creating a new "branch" +// while maintaining references to unmodified subtrees. This means that for a +// tree of height h, only h nodes need to be copied instead of the entire tree. +// +// Usage Considerations: +// While the CoW mechanism handles structural copying of the tree automatically, +// users need to consider how to handle the values stored in the tree: +// +// 1. Simple Values (int, string, etc.): +// - These are copied by value automatically +// - No additional handling needed +// +// 2. Complex Values (structs, pointers): +// - Only the reference is copied by default +// - Users must implement their own deep copy mechanism if needed +// - Example: +// type MyValue struct { +// Data []int +// } +// +// func (v *MyValue) DeepCopy() *MyValue { +// newData := make([]int, len(v.Data)) +// copy(newData, v.Data) +// return &MyValue{Data: newData} +// } +// +// Example: +// +// // Create original tree +// original := cow.NewTree() +// original.Set("key1", "value1") +// +// // Create a clone - O(1) operation +// clone := original.Clone() +// +// // Modify clone - only affected nodes are copied +// clone.Set("key1", "modified") +// +// // Original remains unchanged +// val, _ := original.Get("key1") // Returns "value1" +// +// This implementation is particularly useful in scenarios where: +// - Multiple versions of a tree need to be maintained +// - Memory efficiency is important +// - Concurrent read access to different versions is needed +// - Temporary modifications need to be tested without affecting the original +// +// Note that while the tree structure itself is CoW, the values stored in the +// tree are not automatically deep-copied. If mutable values are stored in the +// tree, modifications to these values will be visible across all trees sharing +// that node. Users must implement their own deep copy mechanism for values if +// this behavior is not desired. package cow -type ITree interface { - // read operations - - Size() int - Has(key string) bool - Get(key string) (value interface{}, exists bool) - GetByIndex(index int) (key string, value interface{}) - Iterate(start, end string, cb IterCbFn) bool - ReverseIterate(start, end string, cb IterCbFn) bool - IterateByOffset(offset int, count int, cb IterCbFn) bool - ReverseIterateByOffset(offset int, count int, cb IterCbFn) bool - - // write operations - - Set(key string, value interface{}) (updated bool) - Remove(key string) (value interface{}, removed bool) -} - type IterCbFn func(key string, value interface{}) bool //---------------------------------------- @@ -120,5 +177,37 @@ func (tree *Tree) ReverseIterateByOffset(offset int, count int, cb IterCbFn) boo ) } -// Verify that Tree implements ITree -var _ ITree = (*Tree)(nil) +// Equal checks if two trees have identical structure and values +func (tree *Tree) Equal(other *Tree) bool { + return nodesEqual(tree.node, other.node) +} + +// nodesEqual performs a deep comparison between two nodes. +// WARNING: This is an expensive operation that recursively traverses the entire tree structure. +// It should only be used in tests or when absolutely necessary. +// Time complexity: O(n) where n is the total number of nodes in the tree. +// Space complexity: O(h) where h is the height of the tree due to recursion stack. +func nodesEqual(a, b *Node) bool { + if a == b { + return true + } + if a == nil || b == nil { + return false + } + return a.key == b.key && + a.value == b.value && + a.height == b.height && + a.size == b.size && + nodesEqual(a.leftNode, b.leftNode) && + nodesEqual(a.rightNode, b.rightNode) +} + +// Clone creates a shallow copy of the tree +func (tree *Tree) Clone() *Tree { + if tree == nil { + return nil + } + return &Tree{ + node: tree.node, + } +} diff --git a/examples/gno.land/p/moul/cow/tree_test.gno b/examples/gno.land/p/moul/cow/tree_test.gno index 16e0b3ea32b..6ee816455b8 100644 --- a/examples/gno.land/p/moul/cow/tree_test.gno +++ b/examples/gno.land/p/moul/cow/tree_test.gno @@ -1,6 +1,8 @@ package cow -import "testing" +import ( + "testing" +) func TestNewTree(t *testing.T) { tree := NewTree() @@ -159,3 +161,232 @@ func TestTreeReverseIterateByOffset(t *testing.T) { t.Errorf("Expected keys %v, got %v", expectedKeys, keys) } } + +// Verify that Tree implements avl.ITree +// var _ avl.ITree = (*Tree)(nil) // TODO: fix gnovm bug: ./examples/gno.land/p/moul/cow: test pkg: panic: gno.land/p/moul/cow/tree_test.gno:166:5: name avl not defined in fileset with files [node.gno tree.gno node_test.gno tree_test.gno]: + +func TestCopyOnWrite(t *testing.T) { + // Create original tree + original := NewTree() + original.Set("A", 1) + original.Set("B", 2) + original.Set("C", 3) + + // Create a clone + clone := original.Clone() + + // Modify clone + clone.Set("B", 20) + clone.Set("D", 4) + + // Verify original is unchanged + if val, _ := original.Get("B"); val != 2 { + t.Errorf("Original tree was modified: expected B=2, got B=%v", val) + } + if original.Has("D") { + t.Error("Original tree was modified: found key D") + } + + // Verify clone has new values + if val, _ := clone.Get("B"); val != 20 { + t.Errorf("Clone not updated: expected B=20, got B=%v", val) + } + if val, _ := clone.Get("D"); val != 4 { + t.Errorf("Clone not updated: expected D=4, got D=%v", val) + } +} + +func TestCopyOnWriteEdgeCases(t *testing.T) { + t.Run("nil tree clone", func(t *testing.T) { + var original *Tree + clone := original.Clone() + if clone != nil { + t.Error("Expected nil clone from nil tree") + } + }) + + t.Run("empty tree clone", func(t *testing.T) { + original := NewTree() + clone := original.Clone() + + // Modify clone + clone.Set("A", 1) + + if original.Size() != 0 { + t.Error("Original empty tree was modified") + } + if clone.Size() != 1 { + t.Error("Clone was not modified") + } + }) + + t.Run("multiple clones", func(t *testing.T) { + original := NewTree() + original.Set("A", 1) + original.Set("B", 2) + + // Create multiple clones + clone1 := original.Clone() + clone2 := original.Clone() + clone3 := clone1.Clone() + + // Modify each clone differently + clone1.Set("A", 10) + clone2.Set("B", 20) + clone3.Set("C", 30) + + // Check original remains unchanged + if val, _ := original.Get("A"); val != 1 { + t.Errorf("Original modified: expected A=1, got A=%v", val) + } + if val, _ := original.Get("B"); val != 2 { + t.Errorf("Original modified: expected B=2, got B=%v", val) + } + + // Verify each clone has correct values + if val, _ := clone1.Get("A"); val != 10 { + t.Errorf("Clone1 incorrect: expected A=10, got A=%v", val) + } + if val, _ := clone2.Get("B"); val != 20 { + t.Errorf("Clone2 incorrect: expected B=20, got B=%v", val) + } + if val, _ := clone3.Get("C"); val != 30 { + t.Errorf("Clone3 incorrect: expected C=30, got C=%v", val) + } + }) + + t.Run("clone after removal", func(t *testing.T) { + original := NewTree() + original.Set("A", 1) + original.Set("B", 2) + original.Set("C", 3) + + // Remove a node and then clone + original.Remove("B") + clone := original.Clone() + + // Modify clone + clone.Set("B", 20) + + // Verify original state + if original.Has("B") { + t.Error("Original tree should not have key B") + } + + // Verify clone state + if val, _ := clone.Get("B"); val != 20 { + t.Errorf("Clone incorrect: expected B=20, got B=%v", val) + } + }) + + t.Run("concurrent modifications", func(t *testing.T) { + original := NewTree() + original.Set("A", 1) + original.Set("B", 2) + + clone1 := original.Clone() + clone2 := original.Clone() + + // Modify same key in different clones + clone1.Set("B", 20) + clone2.Set("B", 30) + + // Each clone should have its own value + if val, _ := clone1.Get("B"); val != 20 { + t.Errorf("Clone1 incorrect: expected B=20, got B=%v", val) + } + if val, _ := clone2.Get("B"); val != 30 { + t.Errorf("Clone2 incorrect: expected B=30, got B=%v", val) + } + }) + + t.Run("deep tree modifications", func(t *testing.T) { + original := NewTree() + // Create a deeper tree + keys := []string{"M", "F", "T", "B", "H", "P", "Z"} + for _, k := range keys { + original.Set(k, k) + } + + clone := original.Clone() + + // Modify a deep node + clone.Set("H", "modified") + + // Check original remains unchanged + if val, _ := original.Get("H"); val != "H" { + t.Errorf("Original modified: expected H='H', got H=%v", val) + } + + // Verify clone modification + if val, _ := clone.Get("H"); val != "modified" { + t.Errorf("Clone incorrect: expected H='modified', got H=%v", val) + } + }) + + t.Run("rebalancing test", func(t *testing.T) { + original := NewTree() + // Insert nodes that will cause rotations + keys := []string{"A", "B", "C", "D", "E"} + for _, k := range keys { + original.Set(k, k) + } + + clone := original.Clone() + + // Add more nodes to clone to trigger rebalancing + clone.Set("F", "F") + clone.Set("G", "G") + + // Verify original structure remains unchanged + originalKeys := collectKeys(original) + expectedOriginal := []string{"A", "B", "C", "D", "E"} + if !slicesEqual(originalKeys, expectedOriginal) { + t.Errorf("Original tree structure changed: got %v, want %v", originalKeys, expectedOriginal) + } + + // Verify clone has all keys + cloneKeys := collectKeys(clone) + expectedClone := []string{"A", "B", "C", "D", "E", "F", "G"} + if !slicesEqual(cloneKeys, expectedClone) { + t.Errorf("Clone tree structure incorrect: got %v, want %v", cloneKeys, expectedClone) + } + }) + + t.Run("value mutation test", func(t *testing.T) { + type MutableValue struct { + Data string + } + + original := NewTree() + mutable := &MutableValue{Data: "original"} + original.Set("key", mutable) + + clone := original.Clone() + + // Modify the mutable value + mutable.Data = "modified" + + // Both original and clone should see the modification + // because we're not deep copying values + origVal, _ := original.Get("key") + cloneVal, _ := clone.Get("key") + + if origVal.(*MutableValue).Data != "modified" { + t.Error("Original value not modified as expected") + } + if cloneVal.(*MutableValue).Data != "modified" { + t.Error("Clone value not modified as expected") + } + }) +} + +// Helper function to collect all keys in order +func collectKeys(tree *Tree) []string { + var keys []string + tree.Iterate("", "", func(key string, _ interface{}) bool { + keys = append(keys, key) + return false + }) + return keys +}