From 8c47ec9e1fad1ea5475554fdd4b1efcd3a78f1d0 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Tue, 22 Oct 2024 23:10:45 +0530 Subject: [PATCH 1/2] Add random insert and delete benchmarks --- containers-tests/benchmarks/Map.hs | 12 +++++++++++- containers-tests/benchmarks/Set.hs | 8 +++++++- containers-tests/benchmarks/Utils/Random.hs | 20 ++++++++++++++++++++ containers-tests/containers-tests.cabal | 14 ++++++++++++-- 4 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 containers-tests/benchmarks/Utils/Random.hs diff --git a/containers-tests/benchmarks/Map.hs b/containers-tests/benchmarks/Map.hs index 67944277f..95faf7069 100644 --- a/containers-tests/benchmarks/Map.hs +++ b/containers-tests/benchmarks/Map.hs @@ -15,13 +15,16 @@ import Data.Maybe (fromMaybe) import Data.Functor ((<$)) import Data.Coerce import Prelude hiding (lookup) +import System.Random (mkStdGen) +import Utils.Random (shuffle) main = do let m = M.fromAscList elems :: M.Map Int Int m_even = M.fromAscList elems_even :: M.Map Int Int m_odd = M.fromAscList elems_odd :: M.Map Int Int evaluate $ rnf [m, m_even, m_odd] - evaluate $ rnf [elems_rev, elems_asc, elems_desc] + evaluate $ rnf [elems_rev, elems_asc, elems_desc, elems_random] + evaluate $ rnf keys_random defaultMain [ bench "lookup absent" $ whnf (lookup evens) m_odd , bench "lookup present" $ whnf (lookup evens) m_even @@ -33,12 +36,16 @@ main = do , bench "alterF lookup present" $ whnf (atLookup evens) m_even , bench "alterF no rules lookup absent" $ whnf (atLookupNoRules evens) m_odd , bench "alterF no rules lookup present" $ whnf (atLookupNoRules evens) m_even + , bench "insert" $ whnf (ins elems) M.empty + , bench "insert random" $ whnf (ins elems_random) M.empty , bench "insert absent" $ whnf (ins elems_even) m_odd , bench "insert present" $ whnf (ins elems_even) m_even , bench "alterF insert absent" $ whnf (atIns elems_even) m_odd , bench "alterF insert present" $ whnf (atIns elems_even) m_even , bench "alterF no rules insert absent" $ whnf (atInsNoRules elems_even) m_odd , bench "alterF no rules insert present" $ whnf (atInsNoRules elems_even) m_even + , bench "delete" $ whnf (del keys) m + , bench "delete random" $ whnf (del keys_random) m , bench "delete absent" $ whnf (del evens) m_odd , bench "delete present" $ whnf (del evens) m , bench "alterF delete absent" $ whnf (atDel evens) m_odd @@ -117,6 +124,9 @@ main = do evens = [2,4..bound] odds = [1,3..bound] values = [1..bound] + stdGen = mkStdGen 42 + elems_random = shuffle stdGen elems + keys_random = map fst elems_random sumkv k v1 v2 = k + v1 + v2 consPair k v xs = (k, v) : xs diff --git a/containers-tests/benchmarks/Set.hs b/containers-tests/benchmarks/Set.hs index cf0e3b973..2dfd853c3 100644 --- a/containers-tests/benchmarks/Set.hs +++ b/containers-tests/benchmarks/Set.hs @@ -7,6 +7,8 @@ import Control.Exception (evaluate) import Test.Tasty.Bench (bench, defaultMain, whnf) import Data.List (foldl') import qualified Data.Set as S +import System.Random (mkStdGen) +import Utils.Random (shuffle) main = do let s = S.fromAscList elems :: S.Set Int @@ -14,14 +16,16 @@ main = do s_odd = S.fromAscList elems_odd :: S.Set Int strings_s = S.fromList strings evaluate $ rnf [s, s_even, s_odd] - evaluate $ rnf [elems_rev, elems_asc, elems_desc] + evaluate $ rnf [elems_rev, elems_asc, elems_desc, elems_random] defaultMain [ bench "member" $ whnf (member elems) s , bench "insert" $ whnf (ins elems) S.empty + , bench "insert random" $ whnf (ins elems_random) S.empty , bench "map" $ whnf (S.map (+ 1)) s , bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s , bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s , bench "delete" $ whnf (del elems) s + , bench "delete random" $ whnf (del elems_random) s , bench "findMin" $ whnf S.findMin s , bench "findMax" $ whnf S.findMax s , bench "deleteMin" $ whnf S.deleteMin s @@ -65,6 +69,8 @@ main = do elems_asc = map (`div` 2) [1..bound] -- [0,1,1,2,2..] elems_desc = map (`div` 2) [bound,bound-1..1] -- [..2,2,1,1,0] strings = map show elems + stdGen = mkStdGen 42 + elems_random = shuffle stdGen elems member :: [Int] -> S.Set Int -> Int member xs s = foldl' (\n x -> if S.member x s then n + 1 else n) 0 xs diff --git a/containers-tests/benchmarks/Utils/Random.hs b/containers-tests/benchmarks/Utils/Random.hs new file mode 100644 index 000000000..6628c20ba --- /dev/null +++ b/containers-tests/benchmarks/Utils/Random.hs @@ -0,0 +1,20 @@ +module Utils.Random + ( shuffle + ) where + +import Data.List (unfoldr) +import System.Random (RandomGen, randomR) +import qualified Data.Sequence as Seq + +-- | O(n log n) Fisher-Yates shuffle. +shuffle :: RandomGen g => g -> [a] -> [a] +shuffle g0 xs0 = unfoldr f (g0, Seq.fromList xs0) + where + f (g, xs) + | Seq.null xs = Nothing + | otherwise = Just (x, (g', xs')) + where + (i, g') = randomR (0, Seq.length xs - 1) g + x = Seq.index xs i + xs' = Seq.deleteAt i xs +{-# INLINABLE shuffle #-} diff --git a/containers-tests/containers-tests.cabal b/containers-tests/containers-tests.cabal index da7c76a0f..45527bc76 100644 --- a/containers-tests/containers-tests.cabal +++ b/containers-tests/containers-tests.cabal @@ -163,6 +163,11 @@ benchmark map-benchmarks hs-source-dirs: benchmarks main-is: Map.hs ghc-options: -O2 + build-depends: + random >=1.0 && <1.3 + + other-modules: + Utils.Random benchmark tree-benchmarks import: benchmark-deps, warnings @@ -180,7 +185,7 @@ benchmark sequence-benchmarks main-is: Sequence.hs ghc-options: -O2 build-depends: - random >=0 && <1.2 + random >=1.0 && <1.3 , transformers benchmark set-benchmarks @@ -190,6 +195,11 @@ benchmark set-benchmarks hs-source-dirs: benchmarks main-is: Set.hs ghc-options: -O2 + build-depends: + random >=1.0 && <1.3 + + other-modules: + Utils.Random benchmark graph-benchmarks import: benchmark-deps, warnings @@ -199,7 +209,7 @@ benchmark graph-benchmarks main-is: Graph.hs ghc-options: -O2 build-depends: - random >=0 && <1.2 + random >=1.0 && <1.3 benchmark set-operations-intmap import: benchmark-deps, warnings From 240d0e7d62bf5717c258d39d2b5a5ad74db9a9a4 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Tue, 22 Oct 2024 22:25:49 +0530 Subject: [PATCH 2/2] Inline the common case of balance functions --- containers/src/Data/Map/Internal.hs | 48 +++++++++++++++++++---------- containers/src/Data/Set/Internal.hs | 47 ++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index b230a574e..2112721ca 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -4205,7 +4205,14 @@ ratio = 2 -- It is only written in such a way that every node is pattern-matched only once. balance :: k -> a -> Map k a -> Map k a -> Map k a -balance k x l r = case l of +balance k x l r = case (l, r) of + (Bin ls _ _ _ _, Bin rs _ _ _ _) + | rs <= delta*ls && ls <= delta*rs -> Bin (1+ls+rs) k x l r + _ -> balance_ k x l r +{-# INLINE balance #-} -- See Note [Inlining balance] in Data.Set.Internal + +balance_ :: k -> a -> Map k a -> Map k a -> Map k a +balance_ k x l r = case l of Tip -> case r of Tip -> Bin 1 k x Tip Tip (Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r @@ -4229,13 +4236,12 @@ balance k x l r = case l of | rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr | otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr) (_, _) -> error "Failure in Data.Map.balance" - | ls > delta*rs -> case (ll, lr) of + | {- ls > delta*rs -} otherwise -> case (ll, lr) of (Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr) | lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r) | otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r) (_, _) -> error "Failure in Data.Map.balance" - | otherwise -> Bin (1+ls+rs) k x l r -{-# NOINLINE balance #-} +{-# NOINLINE balance_ #-} -- Functions balanceL and balanceR are specialised versions of balance. -- balanceL only checks whether the left subtree is too big, @@ -4244,7 +4250,14 @@ balance k x l r = case l of -- balanceL is called when left subtree might have been inserted to or when -- right subtree might have been deleted from. balanceL :: k -> a -> Map k a -> Map k a -> Map k a -balanceL k x l r = case r of +balanceL k x l r = case (l, r) of + (Bin ls _ _ _ _, Bin rs _ _ _ _) + | ls <= delta*rs -> Bin (1+ls+rs) k x l r + _ -> balanceL_ k x l r +{-# INLINE balanceL #-} -- See Note [Inlining balance] in Data.Set.Internal + +balanceL_ :: k -> a -> Map k a -> Map k a -> Map k a +balanceL_ k x l r = case r of Tip -> case l of Tip -> Bin 1 k x Tip Tip (Bin _ _ _ Tip Tip) -> Bin 2 k x l Tip @@ -4257,19 +4270,24 @@ balanceL k x l r = case r of (Bin rs _ _ _ _) -> case l of Tip -> Bin (1+rs) k x Tip r - (Bin ls lk lx ll lr) - | ls > delta*rs -> case (ll, lr) of + (Bin ls lk lx ll lr) -> case (ll, lr) of (Bin lls _ _ _ _, Bin lrs lrk lrx lrl lrr) | lrs < ratio*lls -> Bin (1+ls+rs) lk lx ll (Bin (1+rs+lrs) k x lr r) | otherwise -> Bin (1+ls+rs) lrk lrx (Bin (1+lls+size lrl) lk lx ll lrl) (Bin (1+rs+size lrr) k x lrr r) - (_, _) -> error "Failure in Data.Map.balanceL" - | otherwise -> Bin (1+ls+rs) k x l r -{-# NOINLINE balanceL #-} + (_, _) -> error "Failure in Data.Map.balanceL_" +{-# NOINLINE balanceL_ #-} -- balanceR is called when right subtree might have been inserted to or when -- left subtree might have been deleted from. balanceR :: k -> a -> Map k a -> Map k a -> Map k a -balanceR k x l r = case l of +balanceR k x l r = case (l, r) of + (Bin ls _ _ _ _, Bin rs _ _ _ _) + | rs <= delta*ls -> Bin (1+ls+rs) k x l r + _ -> balanceR_ k x l r +{-# INLINE balanceR #-} -- See Note [Inlining balance] in Data.Set.Internal + +balanceR_ :: k -> a -> Map k a -> Map k a -> Map k a +balanceR_ k x l r = case l of Tip -> case r of Tip -> Bin 1 k x Tip Tip (Bin _ _ _ Tip Tip) -> Bin 2 k x Tip r @@ -4282,14 +4300,12 @@ balanceR k x l r = case l of (Bin ls _ _ _ _) -> case r of Tip -> Bin (1+ls) k x l Tip - (Bin rs rk rx rl rr) - | rs > delta*ls -> case (rl, rr) of + (Bin rs rk rx rl rr) -> case (rl, rr) of (Bin rls rlk rlx rll rlr, Bin rrs _ _ _ _) | rls < ratio*rrs -> Bin (1+ls+rs) rk rx (Bin (1+ls+rls) k x l rl) rr | otherwise -> Bin (1+ls+rs) rlk rlx (Bin (1+ls+size rll) k x l rll) (Bin (1+rrs+size rlr) rk rx rlr rr) - (_, _) -> error "Failure in Data.Map.balanceR" - | otherwise -> Bin (1+ls+rs) k x l r -{-# NOINLINE balanceR #-} + (_, _) -> error "Failure in Data.Map.balanceR_" +{-# NOINLINE balanceR_ #-} {-------------------------------------------------------------------- diff --git a/containers/src/Data/Set/Internal.hs b/containers/src/Data/Set/Internal.hs index a55877501..00d0bd152 100644 --- a/containers/src/Data/Set/Internal.hs +++ b/containers/src/Data/Set/Internal.hs @@ -1847,10 +1847,30 @@ ratio = 2 -- balanceL only checks whether the left subtree is too big, -- balanceR only checks whether the right subtree is too big. +-- Note [Inlining balance] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- According to benchmarks, we benefit from inlining balanceL and balanceR. But +-- we don't want to cause code bloat from inlining these large functions too +-- much. As a compromise, we inline only one case: Both trees are Bins already +-- balanced with respect to each other. +-- +-- For typical use cases this is the most frequently applicable case. For +-- instance, for n inserts there may be O(n log n) calls to balanceL/balanceR +-- but at most O(n) of them actually require rebalancing. Benchmarks show that +-- inlining this case provides most of the potential benefits of inlining the +-- full function. + -- balanceL is called when left subtree might have been inserted to or when -- right subtree might have been deleted from. balanceL :: a -> Set a -> Set a -> Set a -balanceL x l r = case r of +balanceL x l r = case (l, r) of + (Bin ls _ _ _, Bin rs _ _ _) + | ls <= delta*rs -> Bin (1+ls+rs) x l r + _ -> balanceL_ x l r +{-# INLINE balanceL #-} -- See Note [Inlining balance] + +balanceL_ :: a -> Set a -> Set a -> Set a +balanceL_ x l r = case r of Tip -> case l of Tip -> Bin 1 x Tip Tip (Bin _ _ Tip Tip) -> Bin 2 x l Tip @@ -1863,19 +1883,24 @@ balanceL x l r = case r of (Bin rs _ _ _) -> case l of Tip -> Bin (1+rs) x Tip r - (Bin ls lx ll lr) - | ls > delta*rs -> case (ll, lr) of + (Bin ls lx ll lr) -> case (ll, lr) of (Bin lls _ _ _, Bin lrs lrx lrl lrr) | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r) | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r) - (_, _) -> error "Failure in Data.Set.balanceL" - | otherwise -> Bin (1+ls+rs) x l r -{-# NOINLINE balanceL #-} + (_, _) -> error "Failure in Data.Set.balanceL_" +{-# NOINLINE balanceL_ #-} -- balanceR is called when right subtree might have been inserted to or when -- left subtree might have been deleted from. balanceR :: a -> Set a -> Set a -> Set a -balanceR x l r = case l of +balanceR x l r = case (l, r) of + (Bin ls _ _ _, Bin rs _ _ _) + | rs <= delta*ls -> Bin (1+ls+rs) x l r + _ -> balanceR_ x l r +{-# INLINE balanceR #-} -- See Note [Inlining balance] + +balanceR_ :: a -> Set a -> Set a -> Set a +balanceR_ x l r = case l of Tip -> case r of Tip -> Bin 1 x Tip Tip (Bin _ _ Tip Tip) -> Bin 2 x Tip r @@ -1888,14 +1913,12 @@ balanceR x l r = case l of (Bin ls _ _ _) -> case r of Tip -> Bin (1+ls) x l Tip - (Bin rs rx rl rr) - | rs > delta*ls -> case (rl, rr) of + (Bin rs rx rl rr) -> case (rl, rr) of (Bin rls rlx rll rlr, Bin rrs _ _ _) | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr) - (_, _) -> error "Failure in Data.Set.balanceR" - | otherwise -> Bin (1+ls+rs) x l r -{-# NOINLINE balanceR #-} + (_, _) -> error "Failure in Data.Set.balanceR_" +{-# NOINLINE balanceR_ #-} {-------------------------------------------------------------------- The bin constructor maintains the size of the tree