diff --git a/Batteries/Data/HashMap.lean b/Batteries/Data/HashMap.lean index 96632fdf28..62f9dac6f2 100644 --- a/Batteries/Data/HashMap.lean +++ b/Batteries/Data/HashMap.lean @@ -1,3 +1,2 @@ import Batteries.Data.HashMap.Basic import Batteries.Data.HashMap.Lemmas -import Batteries.Data.HashMap.WF diff --git a/Batteries/Data/HashMap/Basic.lean b/Batteries/Data/HashMap/Basic.lean index 0d1b2b1fe1..42eb49fde3 100644 --- a/Batteries/Data/HashMap/Basic.lean +++ b/Batteries/Data/HashMap/Basic.lean @@ -3,9 +3,7 @@ Copyright (c) 2018 Microsoft Corporation. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Leonardo de Moura, Mario Carneiro -/ -import Batteries.Data.AssocList -import Batteries.Data.Nat.Basic -import Batteries.Data.Array.Monadic +import Std.Data.HashMap namespace Batteries.HashMap @@ -14,232 +12,6 @@ class LawfulHashable (α : Type _) [BEq α] [Hashable α] : Prop where /-- Two elements which compare equal under the `BEq` instance have equal hash. -/ hash_eq {a b : α} : a == b → hash a = hash b -namespace Imp - -/-- -The bucket array of a `HashMap` is a nonempty array of `AssocList`s. -(This type is an internal implementation detail of `HashMap`.) --/ -def Buckets (α : Type u) (β : Type v) := {b : Array (AssocList α β) // 0 < b.size} - -namespace Buckets - -/-- Construct a new empty bucket array with the specified capacity. -/ -def mk (buckets := 8) (h : 0 < buckets := by decide) : Buckets α β := - ⟨mkArray buckets .nil, by simp [h]⟩ - -/-- Update one bucket in the bucket array with a new value. -/ -def update (data : Buckets α β) (i : USize) - (d : AssocList α β) (h : i.toNat < data.1.size) : Buckets α β := - ⟨data.1.uset i d h, (Array.size_uset ..).symm ▸ data.2⟩ - -/-- -The number of elements in the bucket array. -Note: this is marked `noncomputable` because it is only intended for specification. --/ -noncomputable def size (data : Buckets α β) : Nat := (data.1.toList.map (·.toList.length)).sum - -@[simp] theorem update_size (self : Buckets α β) (i d h) : - (self.update i d h).1.size = self.1.size := Array.size_uset _ _ _ h - -/-- Map a function over the values in the map. -/ -@[specialize] def mapVal (f : α → β → γ) (self : Buckets α β) : Buckets α γ := - ⟨self.1.map (.mapVal f), by simp [self.2]⟩ - -/-- -The well-formedness invariant for the bucket array says that every element hashes to its index -(assuming the hash is lawful - otherwise there are no promises about where elements are located). --/ -structure WF [BEq α] [Hashable α] (buckets : Buckets α β) : Prop where - /-- The elements of a bucket are all distinct according to the `BEq` relation. -/ - distinct [LawfulHashable α] [PartialEquivBEq α] : ∀ bucket ∈ buckets.1.toList, - bucket.toList.Pairwise fun a b => ¬(a.1 == b.1) - /-- Every element in a bucket should hash to its location. -/ - hash_self (i : Nat) (h : i < buckets.1.size) : - buckets.1[i].All fun k _ => ((hash k).toUSize % USize.ofNat buckets.1.size).toNat = i - -end Buckets -end Imp - -/-- `HashMap.Imp α β` is the internal implementation type of `HashMap α β`. -/ -structure Imp (α : Type u) (β : Type v) where - /-- The number of elements stored in the `HashMap`. - We cache this both so that we can implement `.size` in `O(1)`, and also because we - use the size to determine when to resize the map. -/ - size : Nat - /-- The bucket array of the `HashMap`. -/ - buckets : Imp.Buckets α β - -namespace Imp - -/-- -Given a desired capacity, this returns the number of buckets we should reserve. -A "load factor" of 0.75 is the usual standard for hash maps, so we return `capacity * 4 / 3`. --/ -@[inline] def numBucketsForCapacity (capacity : Nat) : Nat := - capacity * 4 / 3 - -/-- Constructs an empty hash map with the specified nonzero number of buckets. -/ -@[inline] def empty' (buckets := 8) (h : 0 < buckets := by decide) : Imp α β := - ⟨0, .mk buckets h⟩ - -/-- Constructs an empty hash map with the specified target capacity. -/ -def empty (capacity := 0) : Imp α β := - let nbuckets := numBucketsForCapacity capacity - let n : {n : Nat // 0 < n} := - if h : nbuckets = 0 then ⟨8, by decide⟩ - else ⟨nbuckets, Nat.zero_lt_of_ne_zero h⟩ - empty' n n.2 - -/-- Calculates the bucket index from a hash value `u`. -/ -def mkIdx {n : Nat} (h : 0 < n) (u : USize) : {u : USize // u.toNat < n} := - ⟨u % USize.ofNat n, USize.toNat_mod_lt _ h⟩ - -/-- -Inserts a key-value pair into the bucket array. This function assumes that the data is not -already in the array, which is appropriate when reinserting elements into the array after a resize. --/ -@[inline] def reinsertAux [Hashable α] - (data : Buckets α β) (a : α) (b : β) : Buckets α β := - let ⟨i, h⟩ := mkIdx data.2 (hash a |>.toUSize) - data.update i (.cons a b data.1[i]) h - -/-- Folds a monadic function over the elements in the map (in arbitrary order). -/ -@[inline] def foldM [Monad m] (f : δ → α → β → m δ) (d : δ) (map : Imp α β) : m δ := - map.buckets.1.foldlM (init := d) fun d b => b.foldlM f d - -/-- Folds a function over the elements in the map (in arbitrary order). -/ -@[inline] def fold (f : δ → α → β → δ) (d : δ) (m : Imp α β) : δ := - Id.run $ foldM f d m - -/-- Runs a monadic function over the elements in the map (in arbitrary order). -/ -@[inline] def forM [Monad m] (f : α → β → m PUnit) (h : Imp α β) : m PUnit := - h.buckets.1.forM fun b => b.forM f - -/-- Given a key `a`, returns a key-value pair in the map whose key compares equal to `a`. -/ -def findEntry? [BEq α] [Hashable α] (m : Imp α β) (a : α) : Option (α × β) := - let ⟨_, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - buckets.1[i].findEntry? a - -/-- Looks up an element in the map with key `a`. -/ -def find? [BEq α] [Hashable α] (m : Imp α β) (a : α) : Option β := - let ⟨_, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - buckets.1[i].find? a - -/-- Returns true if the element `a` is in the map. -/ -def contains [BEq α] [Hashable α] (m : Imp α β) (a : α) : Bool := - let ⟨_, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - buckets.1[i].contains a - -/-- Copies all the entries from `buckets` into a new hash map with a larger capacity. -/ -def expand [Hashable α] (size : Nat) (buckets : Buckets α β) : Imp α β := - let nbuckets := buckets.1.size * 2 - { size, buckets := go 0 buckets.1 (.mk nbuckets (Nat.mul_pos buckets.2 (by decide))) } -where - /-- Inner loop of `expand`. Copies elements `source[i:]` into `target`, - destroying `source` in the process. -/ - go (i : Nat) (source : Array (AssocList α β)) (target : Buckets α β) : Buckets α β := - if h : i < source.size then - let es := source[i] - -- We remove `es` from `source` to make sure we can reuse its memory cells - -- when performing es.foldl - let source := source.set i .nil - let target := es.foldl reinsertAux target - go (i+1) source target - else target - termination_by source.size - i - -/-- -Inserts key-value pair `a, b` into the map. -If an element equal to `a` is already in the map, it is replaced by `b`. --/ -@[inline] def insert [BEq α] [Hashable α] (m : Imp α β) (a : α) (b : β) : Imp α β := - let ⟨size, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - let bkt := buckets.1[i] - bif bkt.contains a then - ⟨size, buckets.update i (bkt.replace a b) h⟩ - else - let size' := size + 1 - let buckets' := buckets.update i (.cons a b bkt) h - if numBucketsForCapacity size' ≤ buckets.1.size then - { size := size', buckets := buckets' } - else - expand size' buckets' - -/-- -Removes key `a` from the map. If it does not exist in the map, the map is returned unchanged. --/ -def erase [BEq α] [Hashable α] (m : Imp α β) (a : α) : Imp α β := - let ⟨size, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - let bkt := buckets.1[i] - bif bkt.contains a then ⟨size - 1, buckets.update i (bkt.erase a) h⟩ else ⟨size, buckets⟩ - -/-- Map a function over the values in the map. -/ -@[inline] def mapVal (f : α → β → γ) (self : Imp α β) : Imp α γ := - { size := self.size, buckets := self.buckets.mapVal f } - -/-- Performs an in-place edit of the value, ensuring that the value is used linearly. -/ -def modify [BEq α] [Hashable α] (m : Imp α β) (a : α) (f : α → β → β) : Imp α β := - let ⟨size, buckets⟩ := m - let ⟨i, h⟩ := mkIdx buckets.2 (hash a |>.toUSize) - let bkt := buckets.1[i] - let buckets := buckets.update i .nil h -- for linearity - ⟨size, buckets.update i (bkt.modify a f) ((Buckets.update_size ..).symm ▸ h)⟩ - -/-- -Applies `f` to each key-value pair `a, b` in the map. If it returns `some c` then -`a, c` is pushed into the new map; else the key is removed from the map. --/ -@[specialize] def filterMap {α : Type u} {β : Type v} {γ : Type w} - (f : α → β → Option γ) (m : Imp α β) : Imp α γ := - let m' := m.buckets.1.mapM (m := StateT (ULift Nat) Id) (go .nil) |>.run ⟨0⟩ |>.run - have : m'.1.size > 0 := by - have := Array.size_mapM (m := StateT (ULift Nat) Id) (go .nil) m.buckets.1 - simp [SatisfiesM_StateT_eq, SatisfiesM_Id_eq] at this - simp [this, Id.run, m.2.2, m'] - ⟨m'.2.1, m'.1, this⟩ -where - /-- Inner loop of `filterMap`. Note that this reverses the bucket lists, - but this is fine since bucket lists are unordered. -/ - @[specialize] go (acc : AssocList α γ) : AssocList α β → ULift Nat → AssocList α γ × ULift Nat - | .nil, n => (acc, n) - | .cons a b l, n => match f a b with - | none => go acc l n - | some c => go (.cons a c acc) l ⟨n.1 + 1⟩ - -/-- Constructs a map with the set of all pairs `a, b` such that `f` returns true. -/ -@[inline] def filter (f : α → β → Bool) (m : Imp α β) : Imp α β := - m.filterMap fun a b => bif f a b then some b else none - -/-- -The well-formedness invariant for a hash map. The first constructor is the real invariant, -and the others allow us to "cheat" in this file and define `insert` and `erase`, -which have more complex proofs that are delayed to `Batteries.Data.HashMap.Lemmas`. --/ -inductive WF [BEq α] [Hashable α] : Imp α β → Prop where - /-- The real well-formedness invariant: - * The `size` field should match the actual number of elements in the map - * The bucket array should be well-formed, meaning that if the hashable instance - is lawful then every element hashes to its index. -/ - | mk : m.size = m.buckets.size → m.buckets.WF → WF m - /-- The empty hash map is well formed. -/ - | empty' : WF (empty' n h) - /-- Inserting into a well formed hash map yields a well formed hash map. -/ - | insert : WF m → WF (insert m a b) - /-- Removing an element from a well formed hash map yields a well formed hash map. -/ - | erase : WF m → WF (erase m a) - /-- Replacing an element in a well formed hash map yields a well formed hash map. -/ - | modify : WF m → WF (modify m a f) - -theorem WF.empty [BEq α] [Hashable α] : WF (empty n : Imp α β) := empty' - -end Imp - /-- `HashMap α β` is a key-value map which stores elements in an array using a hash function to find the values. This allows it to have very good performance for lookups @@ -247,9 +19,7 @@ to find the values. This allows it to have very good performance for lookups meaning that one should take care to use the map linearly when performing updates. Copies are `O(n)`. -/ -def _root_.Batteries.HashMap (α : Type u) (β : Type v) [BEq α] [Hashable α] := {m : Imp α β // m.WF} - -open HashMap.Imp +abbrev _root_.Batteries.HashMap (α : Type u) (β : Type v) [BEq α] [Hashable α] := Std.HashMap α β /-- Make a new hash map with the specified capacity. -/ @[inline] def _root_.Batteries.mkHashMap [BEq α] [Hashable α] (capacity := 0) : HashMap α β := @@ -276,7 +46,7 @@ The number of elements in the hash map. (ofList [("one", 1), ("two", 2)]).size = 2 ``` -/ -@[inline] def size (self : HashMap α β) : Nat := self.1.size +@[inline] def size (self : HashMap α β) : Nat := Std.HashMap.size self /-- Is the map empty? @@ -285,7 +55,7 @@ Is the map empty? (ofList [("one", 1), ("two", 2)]).isEmpty = false ``` -/ -@[inline] def isEmpty (self : HashMap α β) : Bool := self.size = 0 +@[inline] def isEmpty (self : HashMap α β) : Bool := Std.HashMap.isEmpty self /-- Inserts key-value pair `a, b` into the map. @@ -296,7 +66,8 @@ hashMap.insert "three" 3 = {"one" => 1, "two" => 2, "three" => 3} hashMap.insert "two" 0 = {"one" => 1, "two" => 0} ``` -/ -def insert (self : HashMap α β) (a : α) (b : β) : HashMap α β := ⟨self.1.insert a b, self.2.insert⟩ +@[inline] def insert (self : HashMap α β) (a : α) (b : β) : HashMap α β := + Std.HashMap.insert self a b /-- Similar to `insert`, but also returns a boolean flag indicating whether an existing entry has been @@ -308,10 +79,7 @@ hashMap.insert' "two" 0 = ({"one" => 1, "two" => 0}, true) ``` -/ @[inline] def insert' (m : HashMap α β) (a : α) (b : β) : HashMap α β × Bool := - let old := m.size - let m' := m.insert a b - let replaced := old == m'.size - (m', replaced) + Prod.swap <|Std.HashMap.containsThenInsert m a b /-- Removes key `a` from the map. If it does not exist in the map, the map is returned unchanged. @@ -321,7 +89,7 @@ hashMap.erase "one" = {"two" => 2} hashMap.erase "three" = {"one" => 1, "two" => 2} ``` -/ -@[inline] def erase (self : HashMap α β) (a : α) : HashMap α β := ⟨self.1.erase a, self.2.erase⟩ +@[inline] def erase (self : HashMap α β) (a : α) : HashMap α β := Std.HashMap.erase self a /-- Performs an in-place edit of the value, ensuring that the value is used linearly. @@ -331,20 +99,8 @@ The function `f` is passed the original key of the entry, along with the value i (ofList [("one", 1), ("two", 2)]).modify "three" (fun _ v => v + 1) = {"one" => 1, "two" => 2} ``` -/ -def modify (self : HashMap α β) (a : α) (f : α → β → β) : HashMap α β := - ⟨self.1.modify a f, self.2.modify⟩ - -/-- -Given a key `a`, returns a key-value pair in the map whose key compares equal to `a`. -Note that the returned key may not be identical to the input, if `==` ignores some part -of the value. -``` -def hashMap := ofList [("one", 1), ("two", 2)] -hashMap.findEntry? "one" = some ("one", 1) -hashMap.findEntry? "three" = none -``` --/ -@[inline] def findEntry? (self : HashMap α β) (a : α) : Option (α × β) := self.1.findEntry? a +@[inline] def modify (self : HashMap α β) (a : α) (f : α → β → β) : HashMap α β := + Std.HashMap.modify self a (f a) /-- Looks up an element in the map with key `a`. @@ -354,7 +110,7 @@ hashMap.find? "one" = some 1 hashMap.find? "three" = none ``` -/ -@[inline] def find? (self : HashMap α β) (a : α) : Option β := self.1.find? a +@[inline] def find? (self : HashMap α β) (a : α) : Option β := self[a]? /-- Looks up an element in the map with key `a`. Returns `b₀` if the element is not found. @@ -364,7 +120,7 @@ hashMap.findD "one" 0 = 1 hashMap.findD "three" 0 = 0 ``` -/ -@[inline] def findD (self : HashMap α β) (a : α) (b₀ : β) : β := (self.find? a).getD b₀ +@[inline] def findD (self : HashMap α β) (a : α) (b₀ : β) : β := self.getD a b₀ /-- Looks up an element in the map with key `a`. Panics if the element is not found. @@ -375,10 +131,25 @@ hashMap.find! "three" => panic! ``` -/ @[inline] def find! [Inhabited β] (self : HashMap α β) (a : α) : β := - (self.find? a).getD (panic! "key is not in the map") + self.getD a (panic! "key is not in the map") instance : GetElem (HashMap α β) α (Option β) fun _ _ => True where - getElem m k _ := m.find? k + getElem m k _ := m[k]? + +/-- +Given a key `a`, returns a key-value pair in the map whose key compares equal to `a`. +Note that the returned key may not be identical to the input, if `==` ignores some part +of the value. +``` +def hashMap := ofList [("one", 1), ("two", 2)] +hashMap.findEntry? "one" = some ("one", 1) +hashMap.findEntry? "three" = none +``` +-/ +-- This could be given a more efficient low level implementation. +@[inline] +def findEntry? [BEq α] [Hashable α] (m : Std.HashMap α β) (k : α) : Option (α × β) := + if h : k ∈ m then some (m.getKey k h, m.get k h) else none /-- Returns true if the element `a` is in the map. @@ -388,7 +159,7 @@ hashMap.contains "one" = true hashMap.contains "three" = false ``` -/ -@[inline] def contains (self : HashMap α β) (a : α) : Bool := self.1.contains a +@[inline] def contains (self : HashMap α β) (a : α) : Bool := Std.HashMap.contains self a /-- Folds a monadic function over the elements in the map (in arbitrary order). @@ -402,7 +173,7 @@ foldM sumEven 0 (ofList [("two", 2), ("four", 4)]) = Except.ok 6 ``` -/ @[inline] def foldM [Monad m] (f : δ → α → β → m δ) (init : δ) (self : HashMap α β) : m δ := - self.1.foldM f init + Std.HashMap.foldM f init self /-- Folds a function over the elements in the map (in arbitrary order). @@ -410,8 +181,8 @@ Folds a function over the elements in the map (in arbitrary order). fold (fun sum _ v => sum + v) 0 (ofList [("one", 1), ("two", 2)]) = 3 ``` -/ -@[inline] def fold (f : δ → α → β → δ) (init : δ) (self : HashMap α β) : δ := self.1.fold f init - +@[inline] def fold (f : δ → α → β → δ) (init : δ) (self : HashMap α β) : δ := + Std.HashMap.fold f init self /-- Combines two hashmaps using a monadic function `f` to combine two values at a key. ``` @@ -429,7 +200,7 @@ mergeWithM mergeIfNoConflict? map1 map3 = none @[specialize] def mergeWithM [Monad m] (f : α → β → β → m β) (self other : HashMap α β) : m (HashMap α β) := other.foldM (init := self) fun m k v₂ => - match m.find? k with + match m[k]? with | none => return m.insert k v₂ | some v₁ => return m.insert k (← f k v₁ v₂) @@ -445,7 +216,7 @@ mergeWith (fun _ v₁ v₂ => v₁ + v₂ ) -- Implementing this function directly, rather than via `mergeWithM`, gives -- us less constrained universes. other.fold (init := self) fun map k v₂ => - match map.find? k with + match map[k]? with | none => map.insert k v₂ | some v₁ => map.insert k $ f k v₁ v₂ @@ -459,7 +230,8 @@ forM checkEven (ofList [("one", 1), ("three", 3)]) = Except.error "value 3 at ke forM checkEven (ofList [("two", 2), ("four", 4)]) = Except.ok () ``` -/ -@[inline] def forM [Monad m] (f : α → β → m PUnit) (self : HashMap α β) : m PUnit := self.1.forM f +@[inline] def forM [Monad m] (f : α → β → m PUnit) (self : HashMap α β) : m PUnit := + Std.HashMap.forM f self /-- Converts the map into a list of key-value pairs. @@ -481,7 +253,7 @@ def toArray (self : HashMap α β) : Array (α × β) := self.fold (init := #[]) fun r k v => r.push (k, v) /-- The number of buckets in the hash map. -/ -def numBuckets (self : HashMap α β) : Nat := self.1.buckets.1.size +def numBuckets (self : HashMap α β) : Nat := Std.HashMap.Internal.numBuckets self /-- Builds a `HashMap` from a list of key-value pairs. diff --git a/Batteries/Data/HashMap/Lemmas.lean b/Batteries/Data/HashMap/Lemmas.lean index 2e9df0770d..305077fa88 100644 --- a/Batteries/Data/HashMap/Lemmas.lean +++ b/Batteries/Data/HashMap/Lemmas.lean @@ -14,19 +14,5 @@ more lemmas. See the module `Std.Data.HashMap.Lemmas`. namespace Batteries.HashMap -namespace Imp - -@[simp] theorem empty_buckets : - (empty : Imp α β).buckets = ⟨mkArray 8 AssocList.nil, by simp⟩ := rfl - -@[simp] theorem empty_val [BEq α] [Hashable α] : (∅ : HashMap α β).val = Imp.empty := rfl - -end Imp - @[simp] theorem empty_find? [BEq α] [Hashable α] {a : α} : - (∅ : HashMap α β).find? a = none := by simp [find?, Imp.find?] - --- `Std.HashMap` has this lemma (as `getElem?_insert`) and many more, so working on this --- `proof_wanted` is likely not a good use of your time. --- proof_wanted insert_find? [BEq α] [Hashable α] (m : HashMap α β) (a a' : α) (b : β) : --- (m.insert a b).find? a' = if a' == a then some b else m.find? a' + (∅ : HashMap α β).find? a = none := Std.HashMap.getElem?_empty diff --git a/Batteries/Data/HashMap/WF.lean b/Batteries/Data/HashMap/WF.lean deleted file mode 100644 index aa33711633..0000000000 --- a/Batteries/Data/HashMap/WF.lean +++ /dev/null @@ -1,399 +0,0 @@ -/- -Copyright (c) 2022 Mario Carneiro. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Authors: Mario Carneiro --/ -import Batteries.Tactic.SeqFocus -import Batteries.Data.HashMap.Basic -import Batteries.Data.Nat.Lemmas -import Batteries.Data.List.Lemmas - -namespace Batteries.HashMap -namespace Imp - -attribute [-simp] Bool.not_eq_true - -namespace Buckets - -@[ext] protected theorem ext : ∀ {b₁ b₂ : Buckets α β}, b₁.1.toList = b₂.1.toList → b₁ = b₂ - | ⟨⟨_⟩, _⟩, ⟨⟨_⟩, _⟩, rfl => rfl - -theorem toList_update (self : Buckets α β) (i d h) : - (self.update i d h).1.toList = self.1.toList.set i.toNat d := rfl - -@[deprecated (since := "2024-09-09")] alias update_data := toList_update - -theorem exists_of_update (self : Buckets α β) (i d h) : - ∃ l₁ l₂, self.1.toList = l₁ ++ self.1[i] :: l₂ ∧ List.length l₁ = i.toNat ∧ - (self.update i d h).1.toList = l₁ ++ d :: l₂ := by - simp only [Array.length_toList, Array.ugetElem_eq_getElem, Array.getElem_eq_getElem_toList] - exact List.exists_of_set h - -theorem update_update (self : Buckets α β) (i d d' h h') : - (self.update i d h).update i d' h' = self.update i d' h := by - simp only [update, Array.uset, Array.length_toList] - congr 1 - rw [Array.set_set] - -theorem size_eq (data : Buckets α β) : - size data = (data.1.toList.map (·.toList.length)).sum := rfl - -theorem mk_size (h) : (mk n h : Buckets α β).size = 0 := by - simp only [mk, mkArray, size_eq]; clear h - induction n <;> simp_all [List.replicate_succ] - -theorem WF.mk' [BEq α] [Hashable α] (h) : (Buckets.mk n h : Buckets α β).WF := by - refine ⟨fun _ h => ?_, fun i h => ?_⟩ - · simp only [Buckets.mk, mkArray, List.mem_replicate, ne_eq] at h - simp [h, List.Pairwise.nil] - · simp [Buckets.mk, empty', mkArray, Array.getElem_eq_getElem_toList, AssocList.All] - -theorem WF.update [BEq α] [Hashable α] {buckets : Buckets α β} {i d h} (H : buckets.WF) - (h₁ : ∀ [PartialEquivBEq α] [LawfulHashable α], - (buckets.1[i].toList.Pairwise fun a b => ¬(a.1 == b.1)) → - d.toList.Pairwise fun a b => ¬(a.1 == b.1)) - (h₂ : (buckets.1[i].All fun k _ => ((hash k).toUSize % .ofNat buckets.1.size).toNat = i.toNat) → - d.All fun k _ => ((hash k).toUSize % USize.ofNat buckets.1.size).toNat = i.toNat) : - (buckets.update i d h).WF := by - refine ⟨fun l hl => ?_, fun i hi p hp => ?_⟩ - · exact match List.mem_or_eq_of_mem_set hl with - | .inl hl => H.1 _ hl - | .inr rfl => h₁ (H.1 _ (Array.getElem_mem_toList ..)) - · revert hp - simp only [Array.getElem_eq_getElem_toList, toList_update, List.getElem_set, - Array.length_toList, update_size] - split <;> intro hp - · next eq => exact eq ▸ h₂ (H.2 _ _) _ hp - · simp only [update_size, Array.length_toList] at hi - exact H.2 i hi _ hp - -end Buckets - -theorem reinsertAux_size [Hashable α] (data : Buckets α β) (a : α) (b : β) : - (reinsertAux data a b).size = data.size.succ := by - simp only [reinsertAux, Array.length_toList, Array.ugetElem_eq_getElem, Buckets.size_eq, - Nat.succ_eq_add_one] - refine have ⟨l₁, l₂, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp [h₁, Nat.succ_add]; rfl - -theorem reinsertAux_WF [BEq α] [Hashable α] {data : Buckets α β} {a : α} {b : β} (H : data.WF) - (h₁ : ∀ [PartialEquivBEq α] [LawfulHashable α], - haveI := mkIdx data.2 (hash a).toUSize - data.val[this.1].All fun x _ => ¬(a == x)) : - (reinsertAux data a b).WF := - H.update (.cons h₁) fun - | _, _, .head .. => rfl - | H, _, .tail _ h => H _ h - -theorem expand_size [Hashable α] {buckets : Buckets α β} : - (expand sz buckets).buckets.size = buckets.size := by - rw [expand, go] - · rw [Buckets.mk_size]; simp [Buckets.size] - · nofun -where - go (i source) (target : Buckets α β) (hs : ∀ j < i, source.toList[j]?.getD .nil = .nil) : - (expand.go i source target).size = - (source.toList.map (·.toList.length)).sum + target.size := by - unfold expand.go; split - · next H => - refine (go (i+1) _ _ fun j hj => ?a).trans ?b - · case a => - simp only [Array.length_toList, Array.toList_set] - simp [List.getD_eq_getElem?_getD, List.getElem?_set, Option.map_eq_map]; split - · cases source.toList[j]? <;> rfl - · next H => exact hs _ (Nat.lt_of_le_of_ne (Nat.le_of_lt_succ hj) (Ne.symm H)) - · case b => - simp only [Array.length_toList, Array.toList_set, Array.get_eq_getElem, AssocList.foldl_eq] - refine have ⟨l₁, l₂, h₁, _, eq⟩ := List.exists_of_set H; eq ▸ ?_ - rw [h₁] - simp only [Buckets.size_eq, List.map_append, List.map_cons, AssocList.toList, - List.length_nil, Nat.sum_append, List.sum_cons, Nat.zero_add, Array.length_toList] - rw [Nat.add_assoc, Nat.add_assoc, Nat.add_assoc]; congr 1 - (conv => rhs; rw [Nat.add_left_comm]); congr 1 - rw [← Array.getElem_eq_getElem_toList] - have := @reinsertAux_size α β _; simp [Buckets.size] at this - induction source[i].toList generalizing target <;> simp [*, Nat.succ_add]; rfl - · next H => - rw [(_ : List.sum _ = 0), Nat.zero_add] - rw [← (_ : source.toList.map (fun _ => .nil) = source.toList)] - · simp only [List.map_map] - induction source.toList <;> simp [*] - refine List.ext_getElem (by simp) fun j h₁ h₂ => ?_ - simp only [List.getElem_map, Array.length_toList] - have := (hs j (Nat.lt_of_lt_of_le h₂ (Nat.not_lt.1 H))).symm - rwa [List.getElem?_eq_getElem] at this - termination_by source.size - i - -theorem expand_WF.foldl [BEq α] [Hashable α] (rank : α → Nat) {l : List (α × β)} {i : Nat} - (hl₁ : ∀ [PartialEquivBEq α] [LawfulHashable α], l.Pairwise fun a b => ¬(a.1 == b.1)) - (hl₂ : ∀ x ∈ l, rank x.1 = i) - {target : Buckets α β} (ht₁ : target.WF) - (ht₂ : ∀ bucket ∈ target.1.toList, - bucket.All fun k _ => rank k ≤ i ∧ - ∀ [PartialEquivBEq α] [LawfulHashable α], ∀ x ∈ l, ¬(x.1 == k)) : - (l.foldl (fun d x => reinsertAux d x.1 x.2) target).WF ∧ - ∀ bucket ∈ (l.foldl (fun d x => reinsertAux d x.1 x.2) target).1.toList, - bucket.All fun k _ => rank k ≤ i := by - induction l generalizing target with - | nil => exact ⟨ht₁, fun _ h₁ _ h₂ => (ht₂ _ h₁ _ h₂).1⟩ - | cons _ _ ih => - simp only [List.pairwise_cons, List.mem_cons, forall_eq_or_imp] at hl₁ hl₂ ht₂ - refine ih hl₁.2 hl₂.2 - (reinsertAux_WF ht₁ fun _ h => (ht₂ _ (Array.getElem_mem_toList ..) _ h).2.1) - (fun _ h => ?_) - simp only [reinsertAux, Buckets.update, Array.uset, Array.length_toList, - Array.ugetElem_eq_getElem, Array.toList_set] at h - match List.mem_or_eq_of_mem_set h with - | .inl h => - intro _ hf - have ⟨h₁, h₂⟩ := ht₂ _ h _ hf - exact ⟨h₁, h₂.2⟩ - | .inr h => subst h; intro - | _, .head .. => - exact ⟨hl₂.1 ▸ Nat.le_refl _, fun _ h h' => hl₁.1 _ h (PartialEquivBEq.symm h')⟩ - | _, .tail _ h => - have ⟨h₁, h₂⟩ := ht₂ _ (Array.getElem_mem_toList ..) _ h - exact ⟨h₁, h₂.2⟩ - -theorem expand_WF [BEq α] [Hashable α] {buckets : Buckets α β} (H : buckets.WF) : - (expand sz buckets).buckets.WF := - go _ H.1 H.2 ⟨.mk' _, fun _ _ _ _ => by simp_all [Buckets.mk, List.mem_replicate]⟩ -where - go (i) {source : Array (AssocList α β)} - (hs₁ : ∀ [LawfulHashable α] [PartialEquivBEq α], ∀ bucket ∈ source.toList, - bucket.toList.Pairwise fun a b => ¬(a.1 == b.1)) - (hs₂ : ∀ (j : Nat) (h : j < source.size), - source[j].All fun k _ => ((hash k).toUSize % USize.ofNat source.size).toNat = j) - {target : Buckets α β} (ht : target.WF ∧ ∀ bucket ∈ target.1.toList, - bucket.All fun k _ => ((hash k).toUSize % USize.ofNat source.size).toNat < i) : - (expand.go i source target).WF := by - unfold expand.go; split - · next H => - refine go (i+1) (fun _ hl => ?_) (fun i h => ?_) ?_ - · match List.mem_or_eq_of_mem_set hl with - | .inl hl => exact hs₁ _ hl - | .inr e => exact e ▸ .nil - · simp only [Array.length_toList, Array.size_set, Array.getElem_eq_getElem_toList, - Array.toList_set, List.getElem_set] - split - · nofun - · exact hs₂ _ (by simp_all) - · let rank (k : α) := ((hash k).toUSize % USize.ofNat source.size).toNat - have := expand_WF.foldl rank ?_ (hs₂ _ H) ht.1 (fun _ h₁ _ h₂ => ?_) - · simp only [Array.get_eq_getElem, AssocList.foldl_eq, Array.size_set] - exact ⟨this.1, fun _ h₁ _ h₂ => Nat.lt_succ_of_le (this.2 _ h₁ _ h₂)⟩ - · exact hs₁ _ (Array.getElem_mem_toList ..) - · have := ht.2 _ h₁ _ h₂ - refine ⟨Nat.le_of_lt this, fun _ h h' => Nat.ne_of_lt this ?_⟩ - exact LawfulHashable.hash_eq h' ▸ hs₂ _ H _ h - · exact ht.1 - termination_by source.size - i - -theorem insert_size [BEq α] [Hashable α] {m : Imp α β} {k v} - (h : m.size = m.buckets.size) : - (insert m k v).size = (insert m k v).buckets.size := by - dsimp [insert, cond]; split - · unfold Buckets.size - refine have ⟨_, _, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp [h, h₁, Buckets.size_eq] - split - · unfold Buckets.size - refine have ⟨_, _, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp [h, h₁, Buckets.size_eq, Nat.succ_add]; rfl - · rw [expand_size]; simp only [expand, h, Buckets.size, Array.length_toList, Buckets.update_size] - refine have ⟨_, _, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp [h₁, Buckets.size_eq, Nat.succ_add]; rfl - -private theorem mem_replaceF {l : List (α × β)} {x : α × β} {p : α × β → Bool} {f : α × β → β} : - x ∈ (l.replaceF fun a => bif p a then some (k, f a) else none) → x.1 = k ∨ x ∈ l := by - induction l with - | nil => exact .inr - | cons a l ih => - simp only [List.replaceF, List.mem_cons] - generalize e : cond .. = z; revert e - unfold cond; split <;> (intro h; subst h; simp) - · intro - | .inl eq => exact eq ▸ .inl rfl - | .inr h => exact .inr (.inr h) - · intro - | .inl eq => exact .inr (.inl eq) - | .inr h => exact (ih h).imp_right .inr - -private theorem pairwise_replaceF [BEq α] [PartialEquivBEq α] - {l : List (α × β)} {f : α × β → β} - (H : l.Pairwise fun a b => ¬(a.fst == b.fst)) : - (l.replaceF fun a => bif a.fst == k then some (k, f a) else none) - |>.Pairwise fun a b => ¬(a.fst == b.fst) := by - induction l with - | nil => simp [H] - | cons a l ih => - simp only [List.pairwise_cons, List.replaceF] at H ⊢ - generalize e : cond .. = z; unfold cond at e; revert e - split <;> (intro h; subst h; simp only [List.pairwise_cons]) - · next e => exact ⟨(H.1 · · ∘ PartialEquivBEq.trans e), H.2⟩ - · next e => - refine ⟨fun a h => ?_, ih H.2⟩ - match mem_replaceF h with - | .inl eq => exact eq ▸ ne_true_of_eq_false e - | .inr h => exact H.1 a h - -theorem insert_WF [BEq α] [Hashable α] {m : Imp α β} {k v} - (h : m.buckets.WF) : (insert m k v).buckets.WF := by - dsimp [insert, cond]; split - · next h₁ => - simp only [AssocList.contains_eq, List.any_eq_true] at h₁; have ⟨x, hx₁, hx₂⟩ := h₁ - refine h.update (fun H => ?_) (fun H a h => ?_) - · simp only [AssocList.toList_replace] - exact pairwise_replaceF H - · simp only [AssocList.All, Array.ugetElem_eq_getElem, AssocList.toList_replace] at H h ⊢ - match mem_replaceF h with - | .inl rfl => rfl - | .inr h => exact H _ h - · next h₁ => - rw [Bool.eq_false_iff] at h₁ - simp only [AssocList.contains_eq, ne_eq, List.any_eq_true, not_exists, not_and] at h₁ - suffices _ by split <;> [exact this; refine expand_WF this] - refine h.update (.cons ?_) (fun H a h => ?_) - · exact fun a h h' => h₁ a h (PartialEquivBEq.symm h') - · cases h with - | head => rfl - | tail _ h => exact H _ h - -theorem erase_size [BEq α] [Hashable α] {m : Imp α β} {k} - (h : m.size = m.buckets.size) : - (erase m k).size = (erase m k).buckets.size := by - dsimp [erase, cond]; split - · next H => - simp only [h, Buckets.size] - refine have ⟨_, _, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp only [h₁, Array.length_toList, Array.ugetElem_eq_getElem, List.map_append, List.map_cons, - Nat.sum_append, List.sum_cons, AssocList.toList_erase] - rw [(_ : List.length _ = _ + 1), Nat.add_right_comm]; {rfl} - clear h₁ eq - simp only [AssocList.contains_eq, List.any_eq_true] at H - have ⟨a, h₁, h₂⟩ := H - refine have ⟨_, _, _, _, _, h, eq⟩ := List.exists_of_eraseP h₁ h₂; eq ▸ ?_ - simp [h]; rfl - · exact h - -theorem erase_WF [BEq α] [Hashable α] {m : Imp α β} {k} - (h : m.buckets.WF) : (erase m k).buckets.WF := by - dsimp [erase, cond]; split - · refine h.update (fun H => ?_) (fun H a h => ?_) <;> simp only [AssocList.toList_erase] at h ⊢ - · exact H.sublist (List.eraseP_sublist _) - · exact H _ (List.mem_of_mem_eraseP h) - · exact h - -theorem modify_size [BEq α] [Hashable α] {m : Imp α β} {k} - (h : m.size = m.buckets.size) : - (modify m k f).size = (modify m k f).buckets.size := by - dsimp [modify, cond]; rw [Buckets.update_update] - simp only [h, Buckets.size] - refine have ⟨_, _, h₁, _, eq⟩ := Buckets.exists_of_update ..; eq ▸ ?_ - simp [h, h₁, Buckets.size_eq] - -theorem modify_WF [BEq α] [Hashable α] {m : Imp α β} {k} - (h : m.buckets.WF) : (modify m k f).buckets.WF := by - dsimp [modify, cond]; rw [Buckets.update_update] - refine h.update (fun H => ?_) (fun H a h => ?_) <;> simp at h ⊢ - · exact pairwise_replaceF H - · simp only [AssocList.All, Array.ugetElem_eq_getElem] at H h ⊢ - match mem_replaceF h with - | .inl rfl => rfl - | .inr h => exact H _ h - -theorem WF.out [BEq α] [Hashable α] {m : Imp α β} (h : m.WF) : - m.size = m.buckets.size ∧ m.buckets.WF := by - induction h with - | mk h₁ h₂ => exact ⟨h₁, h₂⟩ - | @empty' _ h => exact ⟨(Buckets.mk_size h).symm, .mk' h⟩ - | insert _ ih => exact ⟨insert_size ih.1, insert_WF ih.2⟩ - | erase _ ih => exact ⟨erase_size ih.1, erase_WF ih.2⟩ - | modify _ ih => exact ⟨modify_size ih.1, modify_WF ih.2⟩ - -theorem WF_iff [BEq α] [Hashable α] {m : Imp α β} : - m.WF ↔ m.size = m.buckets.size ∧ m.buckets.WF := - ⟨(·.out), fun ⟨h₁, h₂⟩ => .mk h₁ h₂⟩ - -theorem WF.mapVal {α β γ} {f : α → β → γ} [BEq α] [Hashable α] - {m : Imp α β} (H : WF m) : WF (mapVal f m) := by - have ⟨h₁, h₂⟩ := H.out - simp only [Imp.mapVal, h₁, Buckets.mapVal, WF_iff]; refine ⟨?_, ?_, fun i h => ?_⟩ - · simp only [Buckets.size, Array.toList_map, List.map_map]; congr; funext l; simp - · simp only [Array.toList_map, List.forall_mem_map] - simp only [AssocList.toList_mapVal, List.pairwise_map] - exact fun _ => h₂.1 _ - · simp only [Array.size_map, AssocList.All, Array.getElem_map, AssocList.toList_mapVal, - List.mem_map, forall_exists_index, and_imp, forall_apply_eq_imp_iff₂] at h ⊢ - intro a m - apply h₂.2 _ _ _ m - -theorem WF.filterMap {α β γ} {f : α → β → Option γ} [BEq α] [Hashable α] - {m : Imp α β} (H : WF m) : WF (filterMap f m) := by - let g₁ (l : AssocList α β) := l.toList.filterMap (fun x => (f x.1 x.2).map (x.1, ·)) - have H1 (l n acc) : filterMap.go f acc l n = - (((g₁ l).reverse ++ acc.toList).toAssocList, ⟨n.1 + (g₁ l).length⟩) := by - induction l generalizing n acc with simp only [filterMap.go, AssocList.toList, - List.filterMap_nil, List.reverse_nil, List.nil_append, AssocList.toList_toAssocList, - List.length_nil, Nat.add_zero, List.filterMap_cons, g₁, *] - | cons a b l => match f a b with - | none => rfl - | some c => - simp only [Option.map_some', List.reverse_cons, List.append_assoc, List.singleton_append, - List.length_cons, Nat.succ_eq_add_one, Prod.mk.injEq, true_and] - rw [Nat.add_right_comm] - rfl - let g l := (g₁ l).reverse.toAssocList - let M := StateT (ULift Nat) Id - have H2 (l : List (AssocList α β)) n : - l.mapM (m := M) (filterMap.go f .nil) n = - (l.map g, ⟨n.1 + ((l.map g).map (·.toList.length)).sum⟩) := by - induction l generalizing n with - | nil => rfl - | cons l L IH => simp [bind, StateT.bind, IH, H1, Nat.add_assoc, g]; rfl - have H3 (l : List _) : - (l.filterMap (fun (a, b) => (f a b).map (a, ·))).map (fun a => a.fst) - |>.Sublist (l.map (·.1)) := by - induction l with - | nil => exact .slnil - | cons a l ih => - simp only [List.filterMap_cons, List.map_cons]; exact match f a.1 a.2 with - | none => .cons _ ih - | some b => .cons₂ _ ih - suffices ∀ bk sz (h : 0 < bk.length), - m.buckets.val.mapM (m := M) (filterMap.go f .nil) ⟨0⟩ = (⟨bk⟩, ⟨sz⟩) → - WF ⟨sz, ⟨bk⟩, h⟩ from this _ _ _ rfl - simp only [Array.mapM_eq_mapM_toList, Functor.map, StateT.map, H2, List.map_map, Nat.zero_add, g] - intro bk sz h e'; cases e' - refine .mk (by simp [Buckets.size]) ⟨?_, fun i h => ?_⟩ - · simp only [List.forall_mem_map, List.toList_toAssocList] - refine fun l h => (List.pairwise_reverse.2 ?_).imp (mt PartialEquivBEq.symm) - have := H.out.2.1 _ h - rw [← List.pairwise_map (R := (¬ · == ·))] at this ⊢ - exact this.sublist (H3 l.toList) - · simp only [Array.size_mk, List.length_map, Array.length_toList, Array.getElem_eq_getElem_toList, - List.getElem_map] at h ⊢ - have := H.out.2.2 _ h - simp only [AssocList.All, List.toList_toAssocList, List.mem_reverse, List.mem_filterMap, - Option.map_eq_some', forall_exists_index, and_imp, g₁] at this ⊢ - rintro _ _ h' _ _ rfl - exact this _ h' - -end Imp - -variable {_ : BEq α} {_ : Hashable α} - -/-- Map a function over the values in the map. -/ -@[inline] def mapVal (f : α → β → γ) (self : HashMap α β) : HashMap α γ := - ⟨self.1.mapVal f, self.2.mapVal⟩ - -/-- -Applies `f` to each key-value pair `a, b` in the map. If it returns `some c` then -`a, c` is pushed into the new map; else the key is removed from the map. --/ -@[inline] def filterMap (f : α → β → Option γ) (self : HashMap α β) : HashMap α γ := - ⟨self.1.filterMap f, self.2.filterMap⟩ - -/-- Constructs a map with the set of all pairs `a, b` such that `f` returns true. -/ -@[inline] def filter (f : α → β → Bool) (self : HashMap α β) : HashMap α β := - self.filterMap fun a b => bif f a b then some b else none