Skip to content
Merged
Show file tree
Hide file tree
Changes from 47 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
03c9060
WIP: Tree-diffing `difference`
sjakobi Oct 24, 2025
a0c47c5
Progress on differenceArrays
sjakobi Oct 24, 2025
35b4184
Add comment
sjakobi Oct 24, 2025
ac613a3
Naming
sjakobi Oct 24, 2025
b067be0
WIP: differenceCollisions
sjakobi Oct 24, 2025
5d54f66
WIP
sjakobi Oct 24, 2025
894a90b
Comment
sjakobi Oct 24, 2025
bf7c61f
Fix some bugs
sjakobi Oct 24, 2025
36453d7
Fix other bug
sjakobi Oct 24, 2025
666172b
Comment out dead alternative
sjakobi Oct 24, 2025
29fb432
Formatting
sjakobi Oct 24, 2025
78806a6
Wibble
sjakobi Oct 24, 2025
c4fa391
Remove Hashable constraint
sjakobi Oct 24, 2025
bd31e60
Add comment
sjakobi Oct 24, 2025
797d699
Small fixes and comments
sjakobi Oct 24, 2025
5249e70
Use updateFullArray where possible
sjakobi Oct 25, 2025
88209ac
Formatting
sjakobi Oct 25, 2025
13cf308
INLINE A.filter
sjakobi Oct 25, 2025
df285a4
Array.shrink: Allow shrinking to length 0
sjakobi Oct 25, 2025
697dffe
More comment
sjakobi Oct 25, 2025
cb46d24
Remove comment
sjakobi Oct 26, 2025
521b5c6
s/delete''/deleteSubTree
sjakobi Oct 26, 2025
4e734aa
Mark deleteSubTree INLINABLE
sjakobi Oct 26, 2025
d001d13
Make deleteSubTree properly self-recursive
sjakobi Oct 26, 2025
ed698ad
Remove comment
sjakobi Oct 26, 2025
a5a45ec
Move differenceArrays into difference
sjakobi Oct 26, 2025
0097077
s/fill/goDA
sjakobi Oct 26, 2025
4ae077d
Strictness
sjakobi Oct 26, 2025
98303d7
Remove unnecessary forcing
sjakobi Oct 26, 2025
51de74d
differenceArrays: No need to force t1
sjakobi Oct 26, 2025
de3288f
Revert "Remove unnecessary forcing"
sjakobi Oct 26, 2025
abbdc16
Remove some comments
sjakobi Oct 26, 2025
f8a6251
Add comment
sjakobi Oct 26, 2025
36c7cb3
differenceArrays: Track changes with Int instead of Bool
sjakobi Oct 26, 2025
7557ad8
Prevent unnecessary branch
sjakobi Oct 26, 2025
452fe59
Comments
sjakobi Oct 27, 2025
7a3397f
Cleanup
sjakobi Oct 27, 2025
b800f86
Remove comment
sjakobi Oct 27, 2025
0284cf4
Use reallyUnsafePtrEquality# for compatibility
sjakobi Oct 27, 2025
e645b87
Update comment
sjakobi Oct 27, 2025
24c2522
differenceCollisions: Add pointer equality check
sjakobi Oct 27, 2025
ecb222e
filter.go: Use strict patterns
sjakobi Oct 27, 2025
3323108
Elide redundant type signature
sjakobi Oct 27, 2025
8aed3e0
Remove outdated comment
sjakobi Oct 27, 2025
5af22f6
Remove INLINABLE pragma from difference.differenceArrays
sjakobi Oct 27, 2025
4afd2eb
difference*: Use A.index# instead of index
sjakobi Oct 28, 2025
8d388a1
Rename inner `go` functions
sjakobi Oct 28, 2025
75cb4ed
s/deleteSubTree/deleteFromSubtree
sjakobi Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 175 additions & 53 deletions Data/HashMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable)
import Data.Hashable.Lifted (Hashable1, Hashable2)
import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare)
import Data.Maybe (isNothing)
import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid)
import GHC.Exts (Int (..), Int#, TYPE, (==#))
import GHC.Stack (HasCallStack)
Expand Down Expand Up @@ -1102,56 +1103,60 @@ delete k m = delete' (hash k) k m
{-# INLINABLE delete #-}

delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v
delete' h0 k0 m0 = go h0 k0 0 m0
where
go !_ !_ !_ Empty = Empty
go h k _ t@(Leaf hy (L ky _))
| hy == h && ky == k = Empty
| otherwise = t
go h k s t@(BitmapIndexed b ary)
| b .&. m == 0 = t
| otherwise =
let !st = A.index ary i
!st' = go h k (nextShift s) st
in if st' `ptrEq` st
then t
else case st' of
Empty | A.length ary == 1 -> Empty
| A.length ary == 2 ->
case (i, A.index ary 0, A.index ary 1) of
(0, _, l) | isLeafOrCollision l -> l
(1, l, _) | isLeafOrCollision l -> l
_ -> bIndexed
| otherwise -> bIndexed
where
bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i)
l | isLeafOrCollision l && A.length ary == 1 -> l
_ -> BitmapIndexed b (A.update ary i st')
where m = mask h s
i = sparseIndex b m
go h k s t@(Full ary) =
let !st = A.index ary i
!st' = go h k (nextShift s) st
delete' h0 k0 m0 = deleteSubTree h0 k0 0 m0
{-# INLINABLE delete' #-}

-- | This version of 'delete' can be used on subtrees when a the
-- corresponding 'Shift' argument is supplied.
deleteSubTree :: Eq k => Hash -> k -> Shift -> HashMap k v -> HashMap k v
deleteSubTree !_ !_ !_ Empty = Empty
deleteSubTree h k _ t@(Leaf hy (L ky _))
| hy == h && ky == k = Empty
| otherwise = t
deleteSubTree h k s t@(BitmapIndexed b ary)
| b .&. m == 0 = t
| otherwise =
let !st = A.index ary i
!st' = deleteSubTree h k (nextShift s) st
in if st' `ptrEq` st
then t
else case st' of
Empty ->
let ary' = A.delete ary i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary'
_ -> Full (A.update ary i st')
where i = index h s
go h k _ t@(Collision hy v)
| h == hy = case indexOf k v of
Just i
| A.length v == 2 ->
if i == 0
then Leaf h (A.index v 1)
else Leaf h (A.index v 0)
| otherwise -> Collision h (A.delete v i)
Nothing -> t
| otherwise = t
{-# INLINABLE delete' #-}
Empty | A.length ary == 1 -> Empty
| A.length ary == 2 ->
case (i, A.index ary 0, A.index ary 1) of
(0, _, l) | isLeafOrCollision l -> l
(1, l, _) | isLeafOrCollision l -> l
_ -> bIndexed
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it always 0 or 1, making the third branch unreachable? I suspect this part can be cleaned up by using let l = ... instead of binding that in a case branch.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The third branch will be used when l isn't a Leaf or Collision.

I have opened #528 to clean up this pattern, and I'll eventually return to that PR! :)

| otherwise -> bIndexed
where
bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i)
l | isLeafOrCollision l && A.length ary == 1 -> l
_ -> BitmapIndexed b (A.update ary i st')
where m = mask h s
i = sparseIndex b m
deleteSubTree h k s t@(Full ary) =
let !st = A.index ary i
!st' = deleteSubTree h k (nextShift s) st
in if st' `ptrEq` st
then t
else case st' of
Empty ->
let ary' = A.delete ary i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary'
_ -> Full (updateFullArray ary i st')
where i = index h s
deleteSubTree h k _ t@(Collision hy v)
| h == hy = case indexOf k v of
Just i
| A.length v == 2 ->
if i == 0
then Leaf h (A.index v 1)
else Leaf h (A.index v 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just do Leaf h (A.index v (1 - i))?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can. But that's what's #528 is about.

| otherwise -> Collision h (A.delete v i)
Nothing -> t
| otherwise = t
{-# INLINABLE deleteSubTree #-}

-- | Delete optimized for the case when we know the key is in the map.
--
Expand Down Expand Up @@ -1188,7 +1193,7 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0
let ary' = A.delete ary i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary'
_ -> Full (A.update ary i st')
_ -> Full (updateFullArray ary i st')
where i = indexSH shiftedHash
go collPos _shiftedHash _k (Collision h v)
| A.length v == 2
Expand Down Expand Up @@ -1780,14 +1785,131 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []

-- | \(O(n \log m)\) Difference of two maps. Return elements of the first map
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the complexity is still the same. For example, if we do something like difference a large_superset_of_a, we end up checking each leaf of a against the superset.

-- not existing in the second.
difference :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v
difference a b = foldlWithKey' go empty a
difference :: Eq k => HashMap k v -> HashMap k w -> HashMap k v
difference = go_difference 0
where
go m k v = case lookup k b of
Nothing -> unsafeInsert k v m
_ -> m
go_difference !_s Empty _ = Empty
go_difference s t1@(Leaf h1 (L k1 _)) t2
= lookupCont (\_ -> t1) (\_ _ -> Empty) h1 k1 s t2
go_difference _ t1 Empty = t1
go_difference s t1 (Leaf h2 (L k2 _)) = deleteSubTree h2 k2 s t1

go_difference s t1@(BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2)
= differenceArrays s b1 ary1 t1 b2 ary2
go_difference s t1@(Full ary1) (BitmapIndexed b2 ary2)
= differenceArrays s fullBitmap ary1 t1 b2 ary2
go_difference s t1@(BitmapIndexed b1 ary1) (Full ary2)
= differenceArrays s b1 ary1 t1 fullBitmap ary2
go_difference s t1@(Full ary1) (Full ary2)
= differenceArrays s fullBitmap ary1 t1 fullBitmap ary2

go_difference s t1@(Collision h1 _) (BitmapIndexed b2 ary2)
| b2 .&. m == 0 = t1
| otherwise =
case A.index# ary2 (sparseIndex b2 m) of
(# st2 #) -> go_difference (nextShift s) t1 st2
where m = mask h1 s
go_difference s t1@(Collision h1 _) (Full ary2)
= case A.index# ary2 (index h1 s) of
(# st2 #) -> go_difference (nextShift s) t1 st2

go_difference s t1@(BitmapIndexed b1 ary1) t2@(Collision h2 _)
| b1 .&. m == 0 = t1
| otherwise =
let (# !st #) = A.index# ary1 i1
in case go_difference (nextShift s) st t2 of
Empty {- | A.length ary1 == 1 -> Empty -- Impossible! -}
| A.length ary1 == 2 ->
case (i1, A.index ary1 0, A.index ary1 1) of
(0, _, l) | isLeafOrCollision l -> l
(1, l, _) | isLeafOrCollision l -> l
_ -> bIndexed
| otherwise -> bIndexed
where
bIndexed
= BitmapIndexed (b1 .&. complement m) (A.delete ary1 i1)
st' | isLeafOrCollision st' && A.length ary1 == 1 -> st'
| st `ptrEq` st' -> t1
| otherwise -> BitmapIndexed b1 (A.update ary1 i1 st')
where
m = mask h2 s
i1 = sparseIndex b1 m
go_difference s t1@(Full ary1) t2@(Collision h2 _)
= let (# !st #) = A.index# ary1 i
in case go_difference (nextShift s) st t2 of
Empty ->
let ary1' = A.delete ary1 i
bm = fullBitmap .&. complement (1 `unsafeShiftL` i)
in BitmapIndexed bm ary1'
st' | st `ptrEq` st' -> t1
| otherwise -> Full (updateFullArray ary1 i st')
where i = index h2 s

go_difference _ t1@(Collision h1 ary1) (Collision h2 ary2)
= differenceCollisions h1 ary1 t1 h2 ary2

-- TODO: If we keep 'Full' (#399), differenceArrays could be optimized for
-- each combination of 'Full' and 'BitmapIndexed`.
differenceArrays !s !b1 !ary1 t1 !b2 !ary2
| b1 .&. b2 == 0 = t1
| A.unsafeSameArray ary1 ary2 = Empty
| otherwise = runST $ do
mary <- A.new_ $ A.length ary1

-- TODO: i == popCount bResult. Not sure if that would be faster.
-- Also i1 is in some relation with b1'
let goDA !i !i1 !b1' !bResult !nChanges
| b1' == 0 = pure (bResult, nChanges)
| otherwise = do
!st1 <- A.indexM ary1 i1
case m .&. b2 of
0 -> do
A.write mary i st1
goDA (i + 1) (i1 + 1) nextB1' (bResult .|. m) nChanges
_ -> do
!st2 <- A.indexM ary2 (sparseIndex b2 m)
case go_difference (nextShift s) st1 st2 of
Empty -> goDA i (i1 + 1) nextB1' bResult (nChanges + 1)
st -> do
A.write mary i st
let same = I# (Exts.reallyUnsafePtrEquality# st st1)
let nChanges' = nChanges + (1 - same)
goDA (i + 1) (i1 + 1) nextB1' (bResult .|. m) nChanges'
where
m = b1' .&. negate b1'
nextB1' = b1' .&. complement m

(bResult, nChanges) <- goDA 0 0 b1 0 0
if nChanges == 0
then pure t1
else case popCount bResult of
0 -> pure Empty
1 -> do
l <- A.read mary 0
if isLeafOrCollision l
then pure l
else BitmapIndexed bResult <$> (A.unsafeFreeze =<< A.shrink mary 1)
n -> bitmapIndexedOrFull bResult <$> (A.unsafeFreeze =<< A.shrink mary n)
{-# INLINABLE difference #-}

-- TODO: This could be faster if we would keep track of which elements of ary2
-- we've already matched. Those could be skipped when we check the following
-- elements of ary1.
differenceCollisions :: Eq k => Hash -> A.Array (Leaf k v1) -> HashMap k v1 -> Hash -> A.Array (Leaf k v2) -> HashMap k v1
differenceCollisions !h1 !ary1 t1 !h2 !ary2
| h1 == h2 =
if A.unsafeSameArray ary1 ary2
then Empty
else let ary = A.filter (\(L k1 _) -> isNothing (indexOf k1 ary2)) ary1
in case A.length ary of
0 -> Empty
1 -> case A.index# ary 0 of
(# l #) -> Leaf h1 l
n | A.length ary1 == n -> t1
| otherwise -> Collision h1 ary
| otherwise = t1
{-# INLINABLE differenceCollisions #-}

-- | \(O(n \log m)\) Difference with a combining function. When two equal keys are
-- encountered, the combining function is applied to the values of these keys.
-- If it returns 'Nothing', the element is discarded (proper set difference). If
Expand Down
27 changes: 26 additions & 1 deletion Data/HashMap/Internal/Array.hs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ module Data.HashMap.Internal.Array
, thaw
, map
, map'
, filter
, traverse
, traverse'
, toList
Expand Down Expand Up @@ -113,12 +114,14 @@ import qualified Prelude
if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else
# define CHECK_OP(_func_,_op_,_lhs_,_rhs_) \
if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else
# define CHECK_GE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>=,_lhs_,_rhs_)
# define CHECK_GT(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>,_lhs_,_rhs_)
# define CHECK_LE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,<=,_lhs_,_rhs_)
# define CHECK_EQ(_func_,_lhs_,_rhs_) CHECK_OP(_func_,==,_lhs_,_rhs_)
#else
# define CHECK_BOUNDS(_func_,_len_,_k_)
# define CHECK_OP(_func_,_op_,_lhs_,_rhs_)
# define CHECK_GE(_func_,_lhs_,_rhs_)
# define CHECK_GT(_func_,_lhs_,_rhs_)
# define CHECK_LE(_func_,_lhs_,_rhs_)
# define CHECK_EQ(_func_,_lhs_,_rhs_)
Expand Down Expand Up @@ -221,7 +224,7 @@ new_ n = new n undefinedElem
-- | The returned array is the same as the array given, as it is shrunk in place.
shrink :: MArray s a -> Int -> ST s (MArray s a)
shrink mary _n@(I# n#) =
CHECK_GT("shrink", _n, (0 :: Int))
CHECK_GE("shrink", _n, (0 :: Int))
CHECK_LE("shrink", _n, (unsafeLengthM mary))
ST $ \s -> case Exts.shrinkSmallMutableArray# (unMArray mary) n# s of
s' -> (# s', mary #)
Expand Down Expand Up @@ -496,6 +499,28 @@ map' f = \ ary ->
go ary mary (i+1) n
{-# INLINE map' #-}

filter :: (a -> Bool) -> Array a -> Array a
filter f = \ ary ->
let !n = length ary
in run $ do
mary <- new_ n
len <- go_filter ary mary 0 0 n
shrink mary len
where
-- Without the @!@ on @ary@ we end up reboxing the array when using
-- 'differenceCollisions'. See
-- https://gitlab.haskell.org/ghc/ghc/-/issues/26525.
go_filter !ary !mary !iAry !iMary !n
| iAry >= n = return iMary
| otherwise = do
x <- indexM ary iAry
if f x
then do
write mary iMary x
go_filter ary mary (iAry + 1) (iMary + 1) n
else go_filter ary mary (iAry + 1) iMary n
{-# INLINE filter #-}

fromList :: Int -> [a] -> Array a
fromList n xs0 =
CHECK_EQ("fromList", n, Prelude.length xs0)
Expand Down