-
Notifications
You must be signed in to change notification settings - Fork 103
Use tree-diffing for difference
#535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 47 commits
03c9060
a0c47c5
35b4184
ac613a3
b067be0
5d54f66
894a90b
bf7c61f
36453d7
666172b
29fb432
78806a6
c4fa391
bd31e60
797d699
5249e70
88209ac
13cf308
df285a4
697dffe
cb46d24
521b5c6
4e734aa
d001d13
ed698ad
a5a45ec
0097077
4ae077d
98303d7
51de74d
de3288f
abbdc16
f8a6251
36c7cb3
7557ad8
452fe59
7a3397f
b800f86
0284cf4
e645b87
24c2522
ecb222e
3323108
8aed3e0
5af22f6
4afd2eb
8d388a1
75cb4ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -163,6 +163,7 @@ import Data.Functor.Identity (Identity (..)) | |
| import Data.Hashable (Hashable) | ||
| import Data.Hashable.Lifted (Hashable1, Hashable2) | ||
| import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare) | ||
| import Data.Maybe (isNothing) | ||
| import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid) | ||
| import GHC.Exts (Int (..), Int#, TYPE, (==#)) | ||
| import GHC.Stack (HasCallStack) | ||
|
|
@@ -1102,56 +1103,60 @@ delete k m = delete' (hash k) k m | |
| {-# INLINABLE delete #-} | ||
|
|
||
| delete' :: Eq k => Hash -> k -> HashMap k v -> HashMap k v | ||
| delete' h0 k0 m0 = go h0 k0 0 m0 | ||
| where | ||
| go !_ !_ !_ Empty = Empty | ||
| go h k _ t@(Leaf hy (L ky _)) | ||
| | hy == h && ky == k = Empty | ||
| | otherwise = t | ||
| go h k s t@(BitmapIndexed b ary) | ||
| | b .&. m == 0 = t | ||
| | otherwise = | ||
| let !st = A.index ary i | ||
| !st' = go h k (nextShift s) st | ||
| in if st' `ptrEq` st | ||
| then t | ||
| else case st' of | ||
| Empty | A.length ary == 1 -> Empty | ||
| | A.length ary == 2 -> | ||
| case (i, A.index ary 0, A.index ary 1) of | ||
| (0, _, l) | isLeafOrCollision l -> l | ||
| (1, l, _) | isLeafOrCollision l -> l | ||
| _ -> bIndexed | ||
| | otherwise -> bIndexed | ||
| where | ||
| bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) | ||
| l | isLeafOrCollision l && A.length ary == 1 -> l | ||
| _ -> BitmapIndexed b (A.update ary i st') | ||
| where m = mask h s | ||
| i = sparseIndex b m | ||
| go h k s t@(Full ary) = | ||
| let !st = A.index ary i | ||
| !st' = go h k (nextShift s) st | ||
| delete' h0 k0 m0 = deleteSubTree h0 k0 0 m0 | ||
| {-# INLINABLE delete' #-} | ||
|
|
||
| -- | This version of 'delete' can be used on subtrees when a the | ||
| -- corresponding 'Shift' argument is supplied. | ||
| deleteSubTree :: Eq k => Hash -> k -> Shift -> HashMap k v -> HashMap k v | ||
| deleteSubTree !_ !_ !_ Empty = Empty | ||
| deleteSubTree h k _ t@(Leaf hy (L ky _)) | ||
| | hy == h && ky == k = Empty | ||
| | otherwise = t | ||
| deleteSubTree h k s t@(BitmapIndexed b ary) | ||
| | b .&. m == 0 = t | ||
| | otherwise = | ||
| let !st = A.index ary i | ||
| !st' = deleteSubTree h k (nextShift s) st | ||
| in if st' `ptrEq` st | ||
| then t | ||
| else case st' of | ||
| Empty -> | ||
| let ary' = A.delete ary i | ||
| bm = fullBitmap .&. complement (1 `unsafeShiftL` i) | ||
| in BitmapIndexed bm ary' | ||
| _ -> Full (A.update ary i st') | ||
| where i = index h s | ||
| go h k _ t@(Collision hy v) | ||
| | h == hy = case indexOf k v of | ||
| Just i | ||
| | A.length v == 2 -> | ||
| if i == 0 | ||
| then Leaf h (A.index v 1) | ||
| else Leaf h (A.index v 0) | ||
| | otherwise -> Collision h (A.delete v i) | ||
| Nothing -> t | ||
| | otherwise = t | ||
| {-# INLINABLE delete' #-} | ||
| Empty | A.length ary == 1 -> Empty | ||
| | A.length ary == 2 -> | ||
| case (i, A.index ary 0, A.index ary 1) of | ||
| (0, _, l) | isLeafOrCollision l -> l | ||
| (1, l, _) | isLeafOrCollision l -> l | ||
| _ -> bIndexed | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it always 0 or 1, making the third branch unreachable? I suspect this part can be cleaned up by using
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The third branch will be used when I have opened #528 to clean up this pattern, and I'll eventually return to that PR! :) |
||
| | otherwise -> bIndexed | ||
| where | ||
| bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) | ||
| l | isLeafOrCollision l && A.length ary == 1 -> l | ||
| _ -> BitmapIndexed b (A.update ary i st') | ||
| where m = mask h s | ||
| i = sparseIndex b m | ||
| deleteSubTree h k s t@(Full ary) = | ||
| let !st = A.index ary i | ||
| !st' = deleteSubTree h k (nextShift s) st | ||
| in if st' `ptrEq` st | ||
| then t | ||
| else case st' of | ||
| Empty -> | ||
| let ary' = A.delete ary i | ||
| bm = fullBitmap .&. complement (1 `unsafeShiftL` i) | ||
| in BitmapIndexed bm ary' | ||
| _ -> Full (updateFullArray ary i st') | ||
| where i = index h s | ||
| deleteSubTree h k _ t@(Collision hy v) | ||
| | h == hy = case indexOf k v of | ||
| Just i | ||
| | A.length v == 2 -> | ||
| if i == 0 | ||
| then Leaf h (A.index v 1) | ||
| else Leaf h (A.index v 0) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just do
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can. But that's what's #528 is about. |
||
| | otherwise -> Collision h (A.delete v i) | ||
| Nothing -> t | ||
| | otherwise = t | ||
| {-# INLINABLE deleteSubTree #-} | ||
|
|
||
| -- | Delete optimized for the case when we know the key is in the map. | ||
| -- | ||
|
|
@@ -1188,7 +1193,7 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 | |
| let ary' = A.delete ary i | ||
| bm = fullBitmap .&. complement (1 `unsafeShiftL` i) | ||
| in BitmapIndexed bm ary' | ||
| _ -> Full (A.update ary i st') | ||
| _ -> Full (updateFullArray ary i st') | ||
| where i = indexSH shiftedHash | ||
| go collPos _shiftedHash _k (Collision h v) | ||
| | A.length v == 2 | ||
|
|
@@ -1780,14 +1785,131 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] | |
|
|
||
| -- | \(O(n \log m)\) Difference of two maps. Return elements of the first map | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the complexity is still the same. For example, if we do something like |
||
| -- not existing in the second. | ||
| difference :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v | ||
| difference a b = foldlWithKey' go empty a | ||
| difference :: Eq k => HashMap k v -> HashMap k w -> HashMap k v | ||
| difference = go_difference 0 | ||
| where | ||
| go m k v = case lookup k b of | ||
| Nothing -> unsafeInsert k v m | ||
| _ -> m | ||
| go_difference !_s Empty _ = Empty | ||
| go_difference s t1@(Leaf h1 (L k1 _)) t2 | ||
| = lookupCont (\_ -> t1) (\_ _ -> Empty) h1 k1 s t2 | ||
| go_difference _ t1 Empty = t1 | ||
| go_difference s t1 (Leaf h2 (L k2 _)) = deleteSubTree h2 k2 s t1 | ||
|
|
||
| go_difference s t1@(BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) | ||
| = differenceArrays s b1 ary1 t1 b2 ary2 | ||
| go_difference s t1@(Full ary1) (BitmapIndexed b2 ary2) | ||
| = differenceArrays s fullBitmap ary1 t1 b2 ary2 | ||
| go_difference s t1@(BitmapIndexed b1 ary1) (Full ary2) | ||
| = differenceArrays s b1 ary1 t1 fullBitmap ary2 | ||
| go_difference s t1@(Full ary1) (Full ary2) | ||
| = differenceArrays s fullBitmap ary1 t1 fullBitmap ary2 | ||
|
|
||
| go_difference s t1@(Collision h1 _) (BitmapIndexed b2 ary2) | ||
| | b2 .&. m == 0 = t1 | ||
| | otherwise = | ||
| case A.index# ary2 (sparseIndex b2 m) of | ||
| (# st2 #) -> go_difference (nextShift s) t1 st2 | ||
| where m = mask h1 s | ||
| go_difference s t1@(Collision h1 _) (Full ary2) | ||
| = case A.index# ary2 (index h1 s) of | ||
| (# st2 #) -> go_difference (nextShift s) t1 st2 | ||
|
|
||
| go_difference s t1@(BitmapIndexed b1 ary1) t2@(Collision h2 _) | ||
| | b1 .&. m == 0 = t1 | ||
| | otherwise = | ||
| let (# !st #) = A.index# ary1 i1 | ||
| in case go_difference (nextShift s) st t2 of | ||
| Empty {- | A.length ary1 == 1 -> Empty -- Impossible! -} | ||
| | A.length ary1 == 2 -> | ||
| case (i1, A.index ary1 0, A.index ary1 1) of | ||
| (0, _, l) | isLeafOrCollision l -> l | ||
| (1, l, _) | isLeafOrCollision l -> l | ||
| _ -> bIndexed | ||
| | otherwise -> bIndexed | ||
| where | ||
| bIndexed | ||
| = BitmapIndexed (b1 .&. complement m) (A.delete ary1 i1) | ||
| st' | isLeafOrCollision st' && A.length ary1 == 1 -> st' | ||
| | st `ptrEq` st' -> t1 | ||
| | otherwise -> BitmapIndexed b1 (A.update ary1 i1 st') | ||
| where | ||
| m = mask h2 s | ||
| i1 = sparseIndex b1 m | ||
| go_difference s t1@(Full ary1) t2@(Collision h2 _) | ||
| = let (# !st #) = A.index# ary1 i | ||
| in case go_difference (nextShift s) st t2 of | ||
| Empty -> | ||
| let ary1' = A.delete ary1 i | ||
| bm = fullBitmap .&. complement (1 `unsafeShiftL` i) | ||
| in BitmapIndexed bm ary1' | ||
| st' | st `ptrEq` st' -> t1 | ||
| | otherwise -> Full (updateFullArray ary1 i st') | ||
| where i = index h2 s | ||
|
|
||
| go_difference _ t1@(Collision h1 ary1) (Collision h2 ary2) | ||
| = differenceCollisions h1 ary1 t1 h2 ary2 | ||
|
|
||
| -- TODO: If we keep 'Full' (#399), differenceArrays could be optimized for | ||
| -- each combination of 'Full' and 'BitmapIndexed`. | ||
| differenceArrays !s !b1 !ary1 t1 !b2 !ary2 | ||
| | b1 .&. b2 == 0 = t1 | ||
| | A.unsafeSameArray ary1 ary2 = Empty | ||
| | otherwise = runST $ do | ||
| mary <- A.new_ $ A.length ary1 | ||
|
|
||
| -- TODO: i == popCount bResult. Not sure if that would be faster. | ||
| -- Also i1 is in some relation with b1' | ||
| let goDA !i !i1 !b1' !bResult !nChanges | ||
| | b1' == 0 = pure (bResult, nChanges) | ||
| | otherwise = do | ||
| !st1 <- A.indexM ary1 i1 | ||
| case m .&. b2 of | ||
| 0 -> do | ||
| A.write mary i st1 | ||
| goDA (i + 1) (i1 + 1) nextB1' (bResult .|. m) nChanges | ||
| _ -> do | ||
| !st2 <- A.indexM ary2 (sparseIndex b2 m) | ||
| case go_difference (nextShift s) st1 st2 of | ||
| Empty -> goDA i (i1 + 1) nextB1' bResult (nChanges + 1) | ||
| st -> do | ||
| A.write mary i st | ||
| let same = I# (Exts.reallyUnsafePtrEquality# st st1) | ||
| let nChanges' = nChanges + (1 - same) | ||
| goDA (i + 1) (i1 + 1) nextB1' (bResult .|. m) nChanges' | ||
| where | ||
| m = b1' .&. negate b1' | ||
| nextB1' = b1' .&. complement m | ||
|
|
||
| (bResult, nChanges) <- goDA 0 0 b1 0 0 | ||
| if nChanges == 0 | ||
| then pure t1 | ||
| else case popCount bResult of | ||
| 0 -> pure Empty | ||
| 1 -> do | ||
| l <- A.read mary 0 | ||
| if isLeafOrCollision l | ||
| then pure l | ||
| else BitmapIndexed bResult <$> (A.unsafeFreeze =<< A.shrink mary 1) | ||
| n -> bitmapIndexedOrFull bResult <$> (A.unsafeFreeze =<< A.shrink mary n) | ||
| {-# INLINABLE difference #-} | ||
|
|
||
| -- TODO: This could be faster if we would keep track of which elements of ary2 | ||
| -- we've already matched. Those could be skipped when we check the following | ||
| -- elements of ary1. | ||
| differenceCollisions :: Eq k => Hash -> A.Array (Leaf k v1) -> HashMap k v1 -> Hash -> A.Array (Leaf k v2) -> HashMap k v1 | ||
| differenceCollisions !h1 !ary1 t1 !h2 !ary2 | ||
| | h1 == h2 = | ||
| if A.unsafeSameArray ary1 ary2 | ||
| then Empty | ||
| else let ary = A.filter (\(L k1 _) -> isNothing (indexOf k1 ary2)) ary1 | ||
| in case A.length ary of | ||
| 0 -> Empty | ||
| 1 -> case A.index# ary 0 of | ||
| (# l #) -> Leaf h1 l | ||
| n | A.length ary1 == n -> t1 | ||
| | otherwise -> Collision h1 ary | ||
| | otherwise = t1 | ||
| {-# INLINABLE differenceCollisions #-} | ||
|
|
||
| -- | \(O(n \log m)\) Difference with a combining function. When two equal keys are | ||
| -- encountered, the combining function is applied to the values of these keys. | ||
| -- If it returns 'Nothing', the element is discarded (proper set difference). If | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.