From 0209162b459ebae5ef72f4b4e54c7b009dda89a8 Mon Sep 17 00:00:00 2001 From: Soumik Sarkar Date: Sat, 17 Aug 2024 19:45:03 +0530 Subject: [PATCH] Add lookupMin and lookupMax for IntSet (#976) These already exist for Set, Map, and IntMap. Additionally, * Implement lookupMin, lookupMax, findMin, findMax in a consistent manner for all structures. The rationale for this implementation is provided in the Note [Inline lookupMin] in Data.Set.Internal. * Update docs for lookupMin, lookupMax, findMin, findMax to be consistent across all structures. --- containers-tests/tests/intset-properties.hs | 9 +++ containers/changelog.md | 4 ++ containers/src/Data/IntMap/Internal.hs | 54 ++++++++++++------ containers/src/Data/IntSet.hs | 2 + containers/src/Data/IntSet/Internal.hs | 62 ++++++++++++++------- containers/src/Data/Map/Internal.hs | 32 +++++++++-- containers/src/Data/Set/Internal.hs | 23 +++++--- 7 files changed, 135 insertions(+), 51 deletions(-) diff --git a/containers-tests/tests/intset-properties.hs b/containers-tests/tests/intset-properties.hs index 69564adcc..d43c25fa4 100644 --- a/containers-tests/tests/intset-properties.hs +++ b/containers-tests/tests/intset-properties.hs @@ -5,6 +5,7 @@ import Data.Word (Word) import Data.IntSet import Data.List (nub,sort) import qualified Data.List as List +import Data.Maybe (listToMaybe) import Data.Monoid (mempty) import qualified Data.Set as Set import IntSetValidity (valid) @@ -55,6 +56,8 @@ main = defaultMain $ testGroup "intset-properties" , testProperty "prop_isSubsetOf2" prop_isSubsetOf2 , testProperty "prop_disjoint" prop_disjoint , testProperty "prop_size" prop_size + , testProperty "prop_lookupMin" prop_lookupMin + , testProperty "prop_lookupMax" prop_lookupMax , testProperty "prop_findMax" prop_findMax , testProperty "prop_findMin" prop_findMin , testProperty "prop_ord" prop_ord @@ -342,6 +345,12 @@ prop_size s = sz === foldl' (\i _ -> i + 1) (0 :: Int) s .&&. sz === List.length (toList s) where sz = size s +prop_lookupMin :: IntSet -> Property +prop_lookupMin s = lookupMin s === listToMaybe (toAscList s) + +prop_lookupMax :: IntSet -> Property +prop_lookupMax s = lookupMax s === listToMaybe (toDescList s) + prop_findMax :: IntSet -> Property prop_findMax s = not (null s) ==> findMax s == maximum (toList s) diff --git a/containers/changelog.md b/containers/changelog.md index b93571273..c0e00aa83 100644 --- a/containers/changelog.md +++ b/containers/changelog.md @@ -35,6 +35,10 @@ provided `only2` function with empty maps, contrary to documentation. (Soumik Sarkar) +### Additions + +* Add `lookupMin` and `lookupMax` for `Data.IntSet`. (Soumik Sarkar) + ## Unreleased with `@since` annotation for 0.7.1: ### Additions diff --git a/containers/src/Data/IntMap/Internal.hs b/containers/src/Data/IntMap/Internal.hs index ed9ce5e34..65ff4eb86 100644 --- a/containers/src/Data/IntMap/Internal.hs +++ b/containers/src/Data/IntMap/Internal.hs @@ -2301,37 +2301,55 @@ deleteFindMax = fromMaybe (error "deleteFindMax: empty map has no maximal elemen deleteFindMin :: IntMap a -> ((Key, a), IntMap a) deleteFindMin = fromMaybe (error "deleteFindMin: empty map has no minimal element") . minViewWithKey +-- The KeyValue type is used when returning a key-value pair and helps with +-- GHC optimizations. +-- +-- For lookupMinSure, if the return type is (Int, a), GHC compiles it to a +-- worker $wlookupMinSure :: IntMap a -> (# Int, a #). If the return type is +-- KeyValue a instead, the worker does not box the int and returns +-- (# Int#, a #). +-- For a modern enough GHC (>=9.4), this measure turns out to be unnecessary in +-- this instance. We still use it for older GHCs and to make our intent clear. + +data KeyValue a = KeyValue {-# UNPACK #-} !Key a + +kvToTuple :: KeyValue a -> (Key, a) +kvToTuple (KeyValue k x) = (k, x) +{-# INLINE kvToTuple #-} + +lookupMinSure :: IntMap a -> KeyValue a +lookupMinSure (Tip k v) = KeyValue k v +lookupMinSure (Bin _ l _) = lookupMinSure l +lookupMinSure Nil = error "lookupMinSure Nil" + -- | \(O(\min(n,W))\). The minimal key of the map. Returns 'Nothing' if the map is empty. lookupMin :: IntMap a -> Maybe (Key, a) -lookupMin Nil = Nothing -lookupMin (Tip k v) = Just (k,v) -lookupMin (Bin p l r) - | signBranch p = go r - | otherwise = go l - where go (Tip k v) = Just (k,v) - go (Bin _ l' _) = go l' - go Nil = Nothing +lookupMin Nil = Nothing +lookupMin (Tip k v) = Just (k,v) +lookupMin (Bin p l r) = + Just $! kvToTuple (lookupMinSure (if signBranch p then r else l)) +{-# INLINE lookupMin #-} -- See Note [Inline lookupMin] in Data.Set.Internal -- | \(O(\min(n,W))\). The minimal key of the map. Calls 'error' if the map is empty. --- Use 'minViewWithKey' if the map may be empty. findMin :: IntMap a -> (Key, a) findMin t | Just r <- lookupMin t = r | otherwise = error "findMin: empty map has no minimal element" +lookupMaxSure :: IntMap a -> KeyValue a +lookupMaxSure (Tip k v) = KeyValue k v +lookupMaxSure (Bin _ _ r) = lookupMaxSure r +lookupMaxSure Nil = error "lookupMaxSure Nil" + -- | \(O(\min(n,W))\). The maximal key of the map. Returns 'Nothing' if the map is empty. lookupMax :: IntMap a -> Maybe (Key, a) -lookupMax Nil = Nothing -lookupMax (Tip k v) = Just (k,v) -lookupMax (Bin p l r) - | signBranch p = go l - | otherwise = go r - where go (Tip k v) = Just (k,v) - go (Bin _ _ r') = go r' - go Nil = Nothing +lookupMax Nil = Nothing +lookupMax (Tip k v) = Just (k,v) +lookupMax (Bin p l r) = + Just $! kvToTuple (lookupMaxSure (if signBranch p then l else r)) +{-# INLINE lookupMax #-} -- See Note [Inline lookupMin] in Data.Set.Internal -- | \(O(\min(n,W))\). The maximal key of the map. Calls 'error' if the map is empty. --- Use 'maxViewWithKey' if the map may be empty. findMax :: IntMap a -> (Key, a) findMax t | Just r <- lookupMax t = r diff --git a/containers/src/Data/IntSet.hs b/containers/src/Data/IntSet.hs index 123cdfb07..5c53c2b35 100644 --- a/containers/src/Data/IntSet.hs +++ b/containers/src/Data/IntSet.hs @@ -136,6 +136,8 @@ module Data.IntSet ( , fold -- * Min\/Max + , lookupMin + , lookupMax , findMin , findMax , deleteMin diff --git a/containers/src/Data/IntSet/Internal.hs b/containers/src/Data/IntSet/Internal.hs index 6d739b8de..74b63f457 100644 --- a/containers/src/Data/IntSet/Internal.hs +++ b/containers/src/Data/IntSet/Internal.hs @@ -153,6 +153,8 @@ module Data.IntSet.Internal ( , fold -- * Min\/Max + , lookupMin + , lookupMax , findMin , findMax , deleteMin @@ -1044,29 +1046,49 @@ deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal elemen deleteFindMax :: IntSet -> (Key, IntSet) deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView +lookupMinSure :: IntSet -> Key +lookupMinSure (Tip kx bm) = kx + lowestBitSet bm +lookupMinSure (Bin _ l _) = lookupMinSure l +lookupMinSure Nil = error "lookupMin Nil" --- | \(O(\min(n,W))\). The minimal element of the set. +-- | \(O(\min(n,W))\). The minimal element of the set. Returns 'Nothing' if the +-- set is empty. +-- +-- @since FIXME +lookupMin :: IntSet -> Maybe Key +lookupMin Nil = Nothing +lookupMin (Tip kx bm) = Just $! kx + lowestBitSet bm +lookupMin (Bin p l r) = Just $! lookupMinSure (if signBranch p then r else l) +{-# INLINE lookupMin #-} -- See Note [Inline lookupMin] in Data.Set.Internal + +-- | \(O(\min(n,W))\). The minimal element of the set. Calls 'error' if the set +-- is empty. findMin :: IntSet -> Key -findMin Nil = error "findMin: empty set has no minimal element" -findMin (Tip kx bm) = kx + lowestBitSet bm -findMin (Bin p l r) - | signBranch p = find r - | otherwise = find l - where find (Tip kx bm) = kx + lowestBitSet bm - find (Bin _ l' _) = find l' - find Nil = error "findMin Nil" - --- | \(O(\min(n,W))\). The maximal element of a set. -findMax :: IntSet -> Key -findMax Nil = error "findMax: empty set has no maximal element" -findMax (Tip kx bm) = kx + highestBitSet bm -findMax (Bin p l r) - | signBranch p = find l - | otherwise = find r - where find (Tip kx bm) = kx + highestBitSet bm - find (Bin _ _ r') = find r' - find Nil = error "findMax Nil" +findMin t + | Just r <- lookupMin t = r + | otherwise = error "findMin: empty set has no minimal element" + +lookupMaxSure :: IntSet -> Key +lookupMaxSure (Tip kx bm) = kx + highestBitSet bm +lookupMaxSure (Bin _ _ r) = lookupMaxSure r +lookupMaxSure Nil = error "lookupMax Nil" +-- | \(O(\min(n,W))\). The maximal element of the set. Returns 'Nothing' if the +-- set is empty. +-- +-- @since FIXME +lookupMax :: IntSet -> Maybe Key +lookupMax Nil = Nothing +lookupMax (Tip kx bm) = Just $! kx + highestBitSet bm +lookupMax (Bin p l r) = Just $! lookupMaxSure (if signBranch p then l else r) +{-# INLINE lookupMax #-} -- See Note [Inline lookupMin] in Data.Set.Internal + +-- | \(O(\min(n,W))\). The maximal element of the set. Calls 'error' if the set +-- is empty. +findMax :: IntSet -> Key +findMax t + | Just r <- lookupMax t = r + | otherwise = error "findMax: empty set has no maximal element" -- | \(O(\min(n,W))\). Delete the minimal element. Returns an empty set if the set is empty. -- diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index 305533695..9263fff2f 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -1627,8 +1627,26 @@ deleteAt !i t = Minimal, Maximal --------------------------------------------------------------------} -lookupMinSure :: k -> a -> Map k a -> (k, a) -lookupMinSure k a Tip = (k, a) +-- The KeyValue type is used when returning a key-value pair and helps GHC keep +-- track of the fact that key is in WHNF. +-- +-- As an example, for a use case like +-- +-- fmap (\(k,_) -> ) (lookupMin m) +-- +-- on a non-empty map, GHC can decide to evaluate the usage of k if it is cheap +-- and put the result in the Just, instead of making a thunk for it. +-- If GHC does not know that k is in WHNF, it could be bottom, and so GHC must +-- always return Just with a thunk inside. + +data KeyValue k a = KeyValue !k a + +kvToTuple :: KeyValue k a -> (k, a) +kvToTuple (KeyValue k a) = (k, a) +{-# INLINE kvToTuple #-} + +lookupMinSure :: k -> a -> Map k a -> KeyValue k a +lookupMinSure !k a Tip = KeyValue k a lookupMinSure _ _ (Bin _ k a l _) = lookupMinSure k a l -- | \(O(\log n)\). The minimal key of the map. Returns 'Nothing' if the map is empty. @@ -1640,7 +1658,8 @@ lookupMinSure _ _ (Bin _ k a l _) = lookupMinSure k a l lookupMin :: Map k a -> Maybe (k,a) lookupMin Tip = Nothing -lookupMin (Bin _ k x l _) = Just $! lookupMinSure k x l +lookupMin (Bin _ k x l _) = Just $! kvToTuple (lookupMinSure k x l) +{-# INLINE lookupMin #-} -- See Note [Inline lookupMin] in Data.Set.Internal -- | \(O(\log n)\). The minimal key of the map. Calls 'error' if the map is empty. -- @@ -1652,8 +1671,8 @@ findMin t | Just r <- lookupMin t = r | otherwise = error "Map.findMin: empty map has no minimal element" -lookupMaxSure :: k -> a -> Map k a -> (k, a) -lookupMaxSure k a Tip = (k, a) +lookupMaxSure :: k -> a -> Map k a -> KeyValue k a +lookupMaxSure !k a Tip = KeyValue k a lookupMaxSure _ _ (Bin _ k a _ r) = lookupMaxSure k a r -- | \(O(\log n)\). The maximal key of the map. Returns 'Nothing' if the map is empty. @@ -1665,7 +1684,8 @@ lookupMaxSure _ _ (Bin _ k a _ r) = lookupMaxSure k a r lookupMax :: Map k a -> Maybe (k, a) lookupMax Tip = Nothing -lookupMax (Bin _ k x _ r) = Just $! lookupMaxSure k x r +lookupMax (Bin _ k x _ r) = Just $! kvToTuple (lookupMaxSure k x r) +{-# INLINE lookupMax #-} -- See Note [Inline lookupMin] in Data.Set.Internal -- | \(O(\log n)\). The maximal key of the map. Calls 'error' if the map is empty. -- diff --git a/containers/src/Data/Set/Internal.hs b/containers/src/Data/Set/Internal.hs index 6db430733..f1ec29c3a 100644 --- a/containers/src/Data/Set/Internal.hs +++ b/containers/src/Data/Set/Internal.hs @@ -754,23 +754,29 @@ disjoint (Bin _ x l r) t Minimal, Maximal --------------------------------------------------------------------} --- We perform call-pattern specialization manually on lookupMin --- and lookupMax. Otherwise, GHC doesn't seem to do it, which is --- unfortunate if, for example, someone uses findMin or findMax. +-- Note [Inline lookupMin] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- The core of lookupMin is implemented as lookupMinSure, a recursive function +-- that does not involve Maybes. lookupMin wraps the result of lookupMinSure in +-- a Just. We inline lookupMin so that GHC optimizations can eliminate the Maybe +-- if it is matched on at the call site. lookupMinSure :: a -> Set a -> a lookupMinSure x Tip = x lookupMinSure _ (Bin _ x l _) = lookupMinSure x l --- | \(O(\log n)\). The minimal element of a set. +-- | \(O(\log n)\). The minimal element of the set. Returns 'Nothing' if the set +-- is empty. -- -- @since 0.5.9 lookupMin :: Set a -> Maybe a lookupMin Tip = Nothing lookupMin (Bin _ x l _) = Just $! lookupMinSure x l +{-# INLINE lookupMin #-} -- See Note [Inline lookupMin] --- | \(O(\log n)\). The minimal element of a set. +-- | \(O(\log n)\). The minimal element of the set. Calls 'error' if the set is +-- empty. findMin :: Set a -> a findMin t | Just r <- lookupMin t = r @@ -780,15 +786,18 @@ lookupMaxSure :: a -> Set a -> a lookupMaxSure x Tip = x lookupMaxSure _ (Bin _ x _ r) = lookupMaxSure x r --- | \(O(\log n)\). The maximal element of a set. +-- | \(O(\log n)\). The maximal element of the set. Returns 'Nothing' if the set +-- is empty. -- -- @since 0.5.9 lookupMax :: Set a -> Maybe a lookupMax Tip = Nothing lookupMax (Bin _ x _ r) = Just $! lookupMaxSure x r +{-# INLINE lookupMax #-} -- See Note [Inline lookupMin] --- | \(O(\log n)\). The maximal element of a set. +-- | \(O(\log n)\). The maximal element of the set. Calls 'error' if the set is +-- empty. findMax :: Set a -> a findMax t | Just r <- lookupMax t = r