Skip to content

Commit

Permalink
feat: extend avl's implementation to add cow support
Browse files Browse the repository at this point in the history
Signed-off-by: moul <[email protected]>
  • Loading branch information
moul committed Dec 11, 2024
1 parent f8c3537 commit 3d09fd8
Show file tree
Hide file tree
Showing 5 changed files with 458 additions and 29 deletions.
2 changes: 1 addition & 1 deletion examples/gno.land/p/moul/cow/gno.mod
Original file line number Diff line number Diff line change
@@ -1 +1 @@
module gno.land/p/demo/avl/cow
module gno.land/p/moul/cow
20 changes: 13 additions & 7 deletions examples/gno.land/p/moul/cow/node.gno
Original file line number Diff line number Diff line change
Expand Up @@ -130,23 +130,29 @@ func (node *Node) Set(key string, value interface{}) (newSelf *Node, updated boo
return NewNode(key, value), false
}

// Always create a new node for leaf nodes
if node.height == 0 {
if key == node.key {
// Create new node even when updating
return NewNode(key, value), true
}
return node.setLeaf(key, value)
}

node = node._copy()
// Copy the node before modifying
newNode := node._copy()
if key < node.key {
node.leftNode, updated = node.getLeftNode().Set(key, value)
newNode.leftNode, updated = node.getLeftNode().Set(key, value)
} else {
node.rightNode, updated = node.getRightNode().Set(key, value)
newNode.rightNode, updated = node.getRightNode().Set(key, value)
}

if updated {
return node, updated
if !updated {
newNode.calcHeightAndSize()
return newNode.balance(), updated
}

node.calcHeightAndSize()
return node.balance(), updated
return newNode, updated
}

// setLeaf inserts a new leaf node with the given key-value pair into the subtree rooted at the node,
Expand Down
103 changes: 103 additions & 0 deletions examples/gno.land/p/moul/cow/node_test.gno
Original file line number Diff line number Diff line change
Expand Up @@ -553,3 +553,106 @@ func reverseSlice(ss []string) {
ss[i], ss[j] = ss[j], ss[i]
}
}

func TestNodeStructuralSharing(t *testing.T) {
t.Run("unmodified paths remain shared", func(t *testing.T) {
root := NewNode("B", 2)
root, _ = root.Set("A", 1)
root, _ = root.Set("C", 3)

originalRight := root.rightNode
newRoot, _ := root.Set("A", 10)

if newRoot.rightNode != originalRight {
t.Error("Unmodified right subtree should remain shared")
}
})

t.Run("multiple modifications reuse shared structure", func(t *testing.T) {
// Create initial tree
root := NewNode("B", 2)
root, _ = root.Set("A", 1)
root, _ = root.Set("C", 3)

// Store original nodes
originalRight := root.rightNode

// First modification
mod1, _ := root.Set("A", 10)

// Second modification
mod2, _ := mod1.Set("C", 30)

// Check sharing in first modification
if mod1.rightNode != originalRight {
t.Error("First modification should share unmodified right subtree")
}

// Check that second modification creates new right node
if mod2.rightNode == originalRight {
t.Error("Second modification should create new right node")
}
})
}

func TestNodeCopyOnWrite(t *testing.T) {
t.Run("copy preserves structure", func(t *testing.T) {
root := NewNode("B", 2)
root, _ = root.Set("A", 1)
root, _ = root.Set("C", 3)

// Only copy non-leaf nodes
if !root.IsLeaf() {
copied := root._copy()
if copied == root {
t.Error("Copy should create new instance")
}
if !nodesEqual(copied, root) {
t.Error("Copied node should preserve structure")
}
}
})

t.Run("removal copy pattern", func(t *testing.T) {
// Create a more complex tree to test removal
root := NewNode("B", 2)
root, _ = root.Set("A", 1)
root, _ = root.Set("C", 3)
root, _ = root.Set("D", 4) // Add this to ensure proper tree structure

// Store references to original nodes
originalRight := root.rightNode
originalRightRight := originalRight.rightNode

// Remove "A" which should only affect the left subtree
newRoot, _, _, _ := root.Remove("A")

// Verify right subtree remains unchanged and shared
if newRoot.rightNode != originalRight {
t.Error("Right subtree should remain shared during removal of left node")
}

// Also verify deeper nodes remain shared
if newRoot.rightNode.rightNode != originalRightRight {
t.Error("Deep right subtree should remain shared during removal")
}

// Verify original tree is unchanged
if _, _, exists := root.Get("A"); !exists {
t.Error("Original tree should remain unchanged")
}
})

t.Run("copy leaf node panic", func(t *testing.T) {
leaf := NewNode("A", 1)

defer func() {
if r := recover(); r == nil {
t.Error("Expected panic when copying leaf node")
}
}()

// This should panic with our specific message
leaf._copy()
})
}
129 changes: 109 additions & 20 deletions examples/gno.land/p/moul/cow/tree.gno
Original file line number Diff line number Diff line change
@@ -1,23 +1,80 @@
// Package cow provides a Copy-on-Write (CoW) AVL tree implementation.
//
// Copy-on-Write is an optimization strategy that creates a copy of a data structure
// only when it is modified, while still presenting the appearance of a full copy.
// When a tree is cloned, instead of copying the entire structure, it initially
// shares all its nodes with the original tree. Only when a modification is made
// to either the original or the clone are new nodes created, and only along the
// path from the root to the modified node.
//
// This implementation is based on the standard AVL tree (examples/gno.land/p/demo/avl)
// and fully implements its ITree interface, while adding a Clone() method for CoW
// functionality. The tree maintains all AVL properties (self-balancing, O(log n)
// operations) while providing efficient copying through structural sharing.
//
// Key features:
// - O(1) cloning operation
// - Minimal memory usage through structural sharing
// - Full AVL tree functionality (self-balancing, ordered operations)
// - Thread-safe for concurrent reads of shared structures
//
// Graph Theory Optimization:
// The implementation uses graph theory principles to minimize the number of node
// copies required. When a modification is made, only the nodes along the path
// from the root to the modified node need to be copied, creating a new "branch"
// while maintaining references to unmodified subtrees. This means that for a
// tree of height h, only h nodes need to be copied instead of the entire tree.
//
// Usage Considerations:
// While the CoW mechanism handles structural copying of the tree automatically,
// users need to consider how to handle the values stored in the tree:
//
// 1. Simple Values (int, string, etc.):
// - These are copied by value automatically
// - No additional handling needed
//
// 2. Complex Values (structs, pointers):
// - Only the reference is copied by default
// - Users must implement their own deep copy mechanism if needed
// - Example:
// type MyValue struct {
// Data []int
// }
//
// func (v *MyValue) DeepCopy() *MyValue {
// newData := make([]int, len(v.Data))
// copy(newData, v.Data)
// return &MyValue{Data: newData}
// }
//
// Example:
//
// // Create original tree
// original := cow.NewTree()
// original.Set("key1", "value1")
//
// // Create a clone - O(1) operation
// clone := original.Clone()
//
// // Modify clone - only affected nodes are copied
// clone.Set("key1", "modified")
//
// // Original remains unchanged
// val, _ := original.Get("key1") // Returns "value1"
//
// This implementation is particularly useful in scenarios where:
// - Multiple versions of a tree need to be maintained
// - Memory efficiency is important
// - Concurrent read access to different versions is needed
// - Temporary modifications need to be tested without affecting the original
//
// Note that while the tree structure itself is CoW, the values stored in the
// tree are not automatically deep-copied. If mutable values are stored in the
// tree, modifications to these values will be visible across all trees sharing
// that node. Users must implement their own deep copy mechanism for values if
// this behavior is not desired.
package cow

type ITree interface {
// read operations

Size() int
Has(key string) bool
Get(key string) (value interface{}, exists bool)
GetByIndex(index int) (key string, value interface{})
Iterate(start, end string, cb IterCbFn) bool
ReverseIterate(start, end string, cb IterCbFn) bool
IterateByOffset(offset int, count int, cb IterCbFn) bool
ReverseIterateByOffset(offset int, count int, cb IterCbFn) bool

// write operations

Set(key string, value interface{}) (updated bool)
Remove(key string) (value interface{}, removed bool)
}

type IterCbFn func(key string, value interface{}) bool

//----------------------------------------
Expand Down Expand Up @@ -120,5 +177,37 @@ func (tree *Tree) ReverseIterateByOffset(offset int, count int, cb IterCbFn) boo
)
}

// Verify that Tree implements ITree
var _ ITree = (*Tree)(nil)
// Equal checks if two trees have identical structure and values
func (tree *Tree) Equal(other *Tree) bool {
return nodesEqual(tree.node, other.node)
}

// nodesEqual performs a deep comparison between two nodes.
// WARNING: This is an expensive operation that recursively traverses the entire tree structure.
// It should only be used in tests or when absolutely necessary.
// Time complexity: O(n) where n is the total number of nodes in the tree.
// Space complexity: O(h) where h is the height of the tree due to recursion stack.
func nodesEqual(a, b *Node) bool {
if a == b {
return true
}
if a == nil || b == nil {
return false
}
return a.key == b.key &&
a.value == b.value &&
a.height == b.height &&
a.size == b.size &&
nodesEqual(a.leftNode, b.leftNode) &&
nodesEqual(a.rightNode, b.rightNode)
}

// Clone creates a shallow copy of the tree
func (tree *Tree) Clone() *Tree {
if tree == nil {
return nil
}
return &Tree{
node: tree.node,
}
}
Loading

0 comments on commit 3d09fd8

Please sign in to comment.