From 636b8e8f4a8e10ecd4ad4cc97bbcc626b6d9c5df Mon Sep 17 00:00:00 2001 From: David Feuer Date: Tue, 2 Jul 2019 21:38:03 -0400 Subject: [PATCH 1/3] Speed up fromList for IntMap Make `fromList` and `fromListWithKey` for `IntMap` smarter. Rather than rebuilding the path from the root for each element, insert as many elements as possible into each subtree before backing out. --- containers/src/Data/IntMap/Internal.hs | 116 ++++++++++++++++-- containers/src/Data/IntMap/Strict/Internal.hs | 108 ++++++++++++++-- 2 files changed, 208 insertions(+), 16 deletions(-) diff --git a/containers/src/Data/IntMap/Internal.hs b/containers/src/Data/IntMap/Internal.hs index 3b8fbb446..29d49625d 100644 --- a/containers/src/Data/IntMap/Internal.hs +++ b/containers/src/Data/IntMap/Internal.hs @@ -3078,10 +3078,60 @@ foldlFB = foldlWithKey -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] fromList :: [(Key,a)] -> IntMap a -fromList xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insert k x t +fromList = insertAll Nil + +-- [Note: fromList] +-- +-- The obvious way to build a map from a list is just to fold over the list +-- inserting each entry into the accumulator map. The problem is that this +-- rebuilds the path from the root *every single time*. To avoid this, we +-- insert as many elements as we can into the current subtree, backing out +-- one level at a time when necessary. + +data Inserted a = Inserted !(IntMap a) ![(Key, a)] + +insertAll :: IntMap a -> [(Key, a)] -> IntMap a +insertAll m [] = m +insertAll m ((k,x) : kxs) + | Inserted m' r <- insertSome m k x kxs + = insertAll m' r + +-- | Insert at least one entry into an 'IntMap'. If others fit +-- inside, insert them too. Return the new map and remaining +-- values. +insertSome :: IntMap a -> Key -> a -> [(Key, a)] -> Inserted a +insertSome t@(Bin p m l r) !k x kxs + | nomatch k p m + = insertMany (link k (Tip k x) p t) kxs + + | zero k m + , Inserted l' kxs' <- insertSome l k x kxs + = insertMany (Bin p m l' r) kxs' + + | Inserted r' kxs' <- insertSome r k x kxs + = insertMany (Bin p m l r') kxs' + +insertSome t@(Tip ky _) k x kxs + | k == ky + = insertMany (Tip k x) kxs + | otherwise + = insertMany (link k (Tip k x) ky t) kxs + +insertSome Nil k x kxs = insertMany (Tip k x) kxs + +-- | Try to insert some entries into an 'IntMap', but only if +-- they fit +insertMany :: IntMap a -> [(Key, a)] -> Inserted a +insertMany t [] = Inserted t [] +insertMany t@(Bin p m _ _) kxs@((k, x) : kxs') + | nomatch k p m + = Inserted t kxs + | otherwise + = insertSome t k x kxs' +insertMany t@(Tip ky _) kxs@((k, x) : kxs') + | k==ky = insertMany (Tip k x) kxs' + | otherwise = Inserted t kxs +insertMany Nil kxs = Inserted Nil kxs -- | /O(n*min(n,W))/. Create a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -3099,10 +3149,60 @@ fromListWith f xs -- > fromListWithKey f [] == empty fromListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t +-- See [Note: fromList] +fromListWithKey f = insertAllWithKey f Nil + +insertAllWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> [(Key, a)] -> IntMap a +insertAllWithKey _f m [] = m +insertAllWithKey f m ((k,x) : kxs) + | Inserted m' r <- insertSomeWithKey f m k x kxs + = insertAllWithKey f m' r + +-- | Insert at least one entry into an 'IntMap'. If others fit +-- inside, insert them too. Return the new map and remaining +-- values. +insertSomeWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> Key -> a -> [(Key, a)] -> Inserted a +insertSomeWithKey f t@(Bin p m l r) !k x kxs + | nomatch k p m + = insertManyWithKey f (link k (Tip k x) p t) kxs + + | zero k m + , Inserted l' kxs' <- insertSomeWithKey f l k x kxs + = insertManyWithKey f (Bin p m l' r) kxs' + + | Inserted r' kxs' <- insertSomeWithKey f r k x kxs + = insertManyWithKey f (Bin p m l r') kxs' + +insertSomeWithKey f t@(Tip ky y) k !x kxs + | k == ky + , y' <- f k x y + = insertManyWithKey f (Tip k y') kxs + | otherwise + = insertManyWithKey f (link k (Tip k x) ky t) kxs + +insertSomeWithKey f Nil k x kxs = insertManyWithKey f (Tip k x) kxs + +-- | Try to insert some entries into an 'IntMap', but only if +-- they fit +insertManyWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> [(Key, a)] -> Inserted a +insertManyWithKey _f t [] = Inserted t [] +insertManyWithKey f t@(Bin p m _ _) kxs@((k, x) : kxs') + | nomatch k p m + = Inserted t kxs + | otherwise + = insertSomeWithKey f t k x kxs' +insertManyWithKey f t@(Tip ky y) kxs@((k, x) : kxs') + | k==ky + , y' <- f k x y + = insertManyWithKey f (Tip k y') kxs' + | otherwise = Inserted t kxs +insertManyWithKey _f Nil kxs = Inserted Nil kxs -- | /O(n)/. Build a map from a list of key\/value pairs where -- the keys are in ascending order. diff --git a/containers/src/Data/IntMap/Strict/Internal.hs b/containers/src/Data/IntMap/Strict/Internal.hs index 9957a041f..c11bd7369 100644 --- a/containers/src/Data/IntMap/Strict/Internal.hs +++ b/containers/src/Data/IntMap/Strict/Internal.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE PatternGuards #-} #include "containers.h" @@ -1066,10 +1067,52 @@ fromSet f (IntSet.Tip kx bm) = buildTree f kx bm (IntSet.suffixBitMask + 1) -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] fromList :: [(Key,a)] -> IntMap a -fromList xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insert k x t +fromList = insertAll Nil + +data Inserted a = Inserted !(IntMap a) ![(Key, a)] + +insertAll :: IntMap a -> [(Key, a)] -> IntMap a +insertAll m [] = m +insertAll m ((k,x) : kxs) + | Inserted m' r <- insertSome m k x kxs + = insertAll m' r + +-- | Insert at least one entry into an 'IntMap'. If others fit +-- inside, insert them too. Return the new map and remaining +-- values. +insertSome :: IntMap a -> Key -> a -> [(Key, a)] -> Inserted a +insertSome t@(Bin p m l r) !k x kxs + | nomatch k p m + = insertMany (link k (Tip k x) p t) kxs + + | zero k m + , Inserted l' kxs' <- insertSome l k x kxs + = insertMany (Bin p m l' r) kxs' + + | Inserted r' kxs' <- insertSome r k x kxs + = insertMany (Bin p m l r') kxs' + +insertSome t@(Tip ky _) k !x kxs + | k == ky + = insertMany (Tip k x) kxs + | otherwise + = insertMany (link k (Tip k x) ky t) kxs + +insertSome Nil k x kxs = insertMany (Tip k x) kxs + +-- | Try to insert some entries into an 'IntMap', but only if +-- they fit +insertMany :: IntMap a -> [(Key, a)] -> Inserted a +insertMany t [] = Inserted t [] +insertMany t@(Bin p m _ _) kxs@((k, x) : kxs') + | nomatch k p m + = Inserted t kxs + | otherwise + = insertSome t k x kxs' +insertMany t@(Tip ky _) kxs@((k, x) : kxs') + | k==ky = x `seq` insertMany (Tip k x) kxs' + | otherwise = Inserted t kxs +insertMany Nil kxs = Inserted Nil kxs -- | /O(n*min(n,W))/. Create a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -1086,10 +1129,59 @@ fromListWith f xs -- > fromListWith (++) [] == empty fromListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t +fromListWithKey f = insertAllWithKey f Nil + +insertAllWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> [(Key, a)] -> IntMap a +insertAllWithKey _f m [] = m +insertAllWithKey f m ((k,x) : kxs) + | Inserted m' r <- insertSomeWithKey f m k x kxs + = insertAllWithKey f m' r + +-- | Insert at least one entry into an 'IntMap'. If others fit +-- inside, insert them too. Return the new map and remaining +-- values. +insertSomeWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> Key -> a -> [(Key, a)] -> Inserted a +insertSomeWithKey f t@(Bin p m l r) !k x kxs + | nomatch k p m + = insertManyWithKey f (link k (Tip k x) p t) kxs + + | zero k m + , Inserted l' kxs' <- insertSomeWithKey f l k x kxs + = insertManyWithKey f (Bin p m l' r) kxs' + + | Inserted r' kxs' <- insertSomeWithKey f r k x kxs + = insertManyWithKey f (Bin p m l r') kxs' + +insertSomeWithKey f t@(Tip ky y) k x kxs + | k == ky + , !y' <- f k x y + = insertManyWithKey f (Tip k y') kxs + | otherwise + = x `seq` insertManyWithKey f (link k (Tip k x) ky t) kxs + +insertSomeWithKey f Nil k x kxs = x `seq` insertManyWithKey f (Tip k x) kxs + +-- | Try to insert some entries into an 'IntMap', but only if +-- they fit +insertManyWithKey + :: (Key -> a -> a -> a) + -> IntMap a -> [(Key, a)] -> Inserted a +insertManyWithKey _f t [] = Inserted t [] +insertManyWithKey f t@(Bin p m _ _) kxs@((k, x) : kxs') + | nomatch k p m + = Inserted t kxs + | otherwise + = insertSomeWithKey f t k x kxs' +insertManyWithKey f t@(Tip ky y) kxs@((k, x) : kxs') + | k==ky + , !y' <- f k x y + = insertManyWithKey f (Tip k y') kxs' + | otherwise = Inserted t kxs +insertManyWithKey _f Nil kxs = Inserted Nil kxs -- | /O(n)/. Build a map from a list of key\/value pairs where -- the keys are in ascending order. From 6ab738d28214843af3e89203a6030f3b26d14a10 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Wed, 3 Jul 2019 15:30:28 -0400 Subject: [PATCH 2/3] Clean up a bit --- containers/src/Data/IntMap/Internal.hs | 27 +++++++++++-------- containers/src/Data/IntMap/Strict/Internal.hs | 9 ++++--- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/containers/src/Data/IntMap/Internal.hs b/containers/src/Data/IntMap/Internal.hs index 29d49625d..b973d8e1b 100644 --- a/containers/src/Data/IntMap/Internal.hs +++ b/containers/src/Data/IntMap/Internal.hs @@ -3079,6 +3079,9 @@ foldlFB = foldlWithKey fromList :: [(Key,a)] -> IntMap a fromList = insertAll Nil +-- GHC wants to inline this, because it's tiny, but that doesn't accomplish +-- anything because it expands to a recursive function. +{-# NOINLINE fromList #-} -- [Note: fromList] -- @@ -3096,9 +3099,9 @@ insertAll m ((k,x) : kxs) | Inserted m' r <- insertSome m k x kxs = insertAll m' r --- | Insert at least one entry into an 'IntMap'. If others fit --- inside, insert them too. Return the new map and remaining --- values. +-- | Insert at least one entry into an 'IntMap' or subtree. If +-- others fit in the same resulting subtree, insert them too. +-- Return the new map and remaining values. insertSome :: IntMap a -> Key -> a -> [(Key, a)] -> Inserted a insertSome t@(Bin p m l r) !k x kxs | nomatch k p m @@ -3119,8 +3122,9 @@ insertSome t@(Tip ky _) k x kxs insertSome Nil k x kxs = insertMany (Tip k x) kxs --- | Try to insert some entries into an 'IntMap', but only if --- they fit + +-- | Try to insert some entries into a subtree of an 'IntMap'. If +-- they belong in some other subtree, just don't insert them. insertMany :: IntMap a -> [(Key, a)] -> Inserted a insertMany t [] = Inserted t [] insertMany t@(Bin p m _ _) kxs@((k, x) : kxs') @@ -3129,9 +3133,9 @@ insertMany t@(Bin p m _ _) kxs@((k, x) : kxs') | otherwise = insertSome t k x kxs' insertMany t@(Tip ky _) kxs@((k, x) : kxs') - | k==ky = insertMany (Tip k x) kxs' + | k==ky = insertSome t k x kxs' | otherwise = Inserted t kxs -insertMany Nil kxs = Inserted Nil kxs +insertMany Nil kxs = Inserted Nil kxs -- Unused case -- | /O(n*min(n,W))/. Create a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -3151,6 +3155,9 @@ fromListWith f xs fromListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a -- See [Note: fromList] fromListWithKey f = insertAllWithKey f Nil +-- GHC wants to inline this because it's tiny, but doing so is useless +-- because it inlines to a recursive function. +{-# NOINLINE fromListWithKey #-} insertAllWithKey :: (Key -> a -> a -> a) @@ -3197,10 +3204,8 @@ insertManyWithKey f t@(Bin p m _ _) kxs@((k, x) : kxs') = Inserted t kxs | otherwise = insertSomeWithKey f t k x kxs' -insertManyWithKey f t@(Tip ky y) kxs@((k, x) : kxs') - | k==ky - , y' <- f k x y - = insertManyWithKey f (Tip k y') kxs' +insertManyWithKey f t@(Tip ky _) kxs@((k, x) : kxs') + | k==ky = insertSomeWithKey f t k x kxs' | otherwise = Inserted t kxs insertManyWithKey _f Nil kxs = Inserted Nil kxs diff --git a/containers/src/Data/IntMap/Strict/Internal.hs b/containers/src/Data/IntMap/Strict/Internal.hs index c11bd7369..ac83bf207 100644 --- a/containers/src/Data/IntMap/Strict/Internal.hs +++ b/containers/src/Data/IntMap/Strict/Internal.hs @@ -1068,6 +1068,7 @@ fromSet f (IntSet.Tip kx bm) = buildTree f kx bm (IntSet.suffixBitMask + 1) fromList :: [(Key,a)] -> IntMap a fromList = insertAll Nil +{-# NOINLINE fromList #-} data Inserted a = Inserted !(IntMap a) ![(Key, a)] @@ -1110,7 +1111,7 @@ insertMany t@(Bin p m _ _) kxs@((k, x) : kxs') | otherwise = insertSome t k x kxs' insertMany t@(Tip ky _) kxs@((k, x) : kxs') - | k==ky = x `seq` insertMany (Tip k x) kxs' + | k==ky = insertSome t k x kxs' | otherwise = Inserted t kxs insertMany Nil kxs = Inserted Nil kxs @@ -1130,6 +1131,7 @@ fromListWith f xs fromListWithKey :: (Key -> a -> a -> a) -> [(Key,a)] -> IntMap a fromListWithKey f = insertAllWithKey f Nil +{-# NOINLINE fromListWithKey #-} insertAllWithKey :: (Key -> a -> a -> a) @@ -1176,10 +1178,9 @@ insertManyWithKey f t@(Bin p m _ _) kxs@((k, x) : kxs') = Inserted t kxs | otherwise = insertSomeWithKey f t k x kxs' -insertManyWithKey f t@(Tip ky y) kxs@((k, x) : kxs') +insertManyWithKey f t@(Tip ky _) kxs@((k, x) : kxs') | k==ky - , !y' <- f k x y - = insertManyWithKey f (Tip k y') kxs' + = insertSomeWithKey f t k x kxs' | otherwise = Inserted t kxs insertManyWithKey _f Nil kxs = Inserted Nil kxs From 7507b3c0b4045fb08aabc87ab8fd70d7d7118e1d Mon Sep 17 00:00:00 2001 From: David Feuer Date: Wed, 3 Jul 2019 16:27:24 -0400 Subject: [PATCH 3/3] Improve fromList for IntSet Make `fromList` for `IntSet` better for partially sorted input. Performance seems to be similar to the old implementation for random input, but nearly as fast as `fromDistinctAscList` for sorted or reverse sorted input. There are pathological cases where the new implementation is significantly but not horribly slower than the old. In particular, I noticed that ```haskell iterate (\n -> (n ^ 2) `rem` (2^32-1)) 20 ``` is pretty bad for the new implementation for some reason. --- containers/src/Data/IntSet/Internal.hs | 55 ++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/containers/src/Data/IntSet/Internal.hs b/containers/src/Data/IntSet/Internal.hs index 3bc157ba1..53c0642d0 100644 --- a/containers/src/Data/IntSet/Internal.hs +++ b/containers/src/Data/IntSet/Internal.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE PatternGuards #-} #if __GLASGOW_HASKELL__ {-# LANGUAGE MagicHash, DeriveDataTypeable, StandaloneDeriving #-} #endif @@ -1052,10 +1053,56 @@ foldlFB = foldl -- | /O(n*min(n,W))/. Create a set from a list of integers. fromList :: [Key] -> IntSet -fromList xs - = Foldable.foldl' ins empty xs - where - ins t x = insert x t +-- See [Note: fromList] in Data.IntMap.Internal +fromList = insertAll Nil +{-# NOINLINE fromList #-} + +data Inserted = Inserted !IntSet ![Key] + +insertAll :: IntSet -> [Key] -> IntSet +insertAll m [] = m +insertAll m (k : kxs) + | Inserted m' r <- insertSome m (prefixOf k) (bitmapOf k) kxs + = insertAll m' r + +-- | Insert at least one entry into an 'IntSet' or subtree. If +-- others fit in the same resulting subtree, insert them too. +-- Return the new set and remaining values. +insertSome :: IntSet -> Prefix -> BitMap -> [Key] -> Inserted +insertSome t@(Bin p m l r) !k !x kxs + | nomatch k p m + = insertMany (link k (Tip k x) p t) kxs + + | zero k m + , Inserted l' kxs' <- insertSome l k x kxs + = insertMany (Bin p m l' r) kxs' + + | Inserted r' kxs' <- insertSome r k x kxs + = insertMany (Bin p m l r') kxs' + +insertSome t@(Tip ky y) k x kxs + | k == ky + = insertMany (Tip k (x .|. y)) kxs + | otherwise + = insertMany (link k (Tip k x) ky t) kxs + +insertSome Nil k x kxs = insertMany (Tip k x) kxs + +-- | Try to insert some entries into a subtree of an 'IntMap'. If +-- they belong in some other subtree, just don't insert them. +insertMany :: IntSet -> [Key] -> Inserted +insertMany t [] = Inserted t [] +insertMany t@(Bin p m _ _) kxs@(kx : kxs') + | nomatch (prefixOf kx) p m + = Inserted t kxs + | otherwise + = insertSome t (prefixOf kx) (bitmapOf kx) kxs' +insertMany t@(Tip ky _) kxs@(kx : kxs') + | prefixOf kx==ky + = insertSome t (prefixOf kx) (bitmapOf kx) kxs' + | otherwise + = Inserted t kxs +insertMany Nil kxs = Inserted Nil kxs -- Unused case -- | /O(n)/. Build a set from an ascending list of elements. -- /The precondition (input list is ascending) is not checked./