Merge pull request #7 from andyferris/ajf/Dictionaries.jl

andyferris · web-flow · commit 1f3cd80d0fa1 · 2020-01-02T13:48:11.000+10:00
Switch to Dictionaries.jl backend
diff --git a/Project.toml b/Project.toml
@@ -1,13 +1,19 @@
 authors = ["Andy Ferris <ferris.andy@gmail.com>"]
 name = "AcceleratedArrays"
 uuid = "44e12807-9a19-5591-91cf-c1b4fb89ce64"
-version = "0.2.2"
+version = "0.3.0"
 
 [deps]
 SplitApplyCombine = "03a91e81-4c3e-53e1-a0a4-9c0c8f19dd66"
+Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 test = ["Test"]
+
+[compat]
+julia = "1"
+SplitApplyCombine = "1"
+Dictionaries = "0.2"
diff --git a/src/AcceleratedArrays.jl b/src/AcceleratedArrays.jl
@@ -1,8 +1,8 @@
 module AcceleratedArrays
 
-using SplitApplyCombine
+using SplitApplyCombine, Dictionaries
 
-using Base: @propagate_inbounds, Fix2, promote_op
+using Base: @propagate_inbounds, Fix2, promote_op, Callable
 
 export accelerate, accelerate!
 export AcceleratedArray, AcceleratedVector, AcceleratedMatrix, MaybeVector, SingleVector
diff --git a/src/HashIndex.jl b/src/HashIndex.jl
@@ -1,10 +1,10 @@
 # Hash table acceleration index
-struct HashIndex{D <: AbstractDict} <: AbstractIndex
+struct HashIndex{D <: HashDictionary} <: AbstractIndex
     dict::D
 end
 
 function HashIndex(a::AbstractArray)
-	dict = Dict{eltype(a), Vector{eltype(keys(a))}}()
+    dict = HashDictionary{eltype(a), Vector{eltype(keys(a))}}()
     
     @inbounds for i in keys(a)
         value = a[i]
@@ -24,85 +24,85 @@ Base.summary(i::HashIndex) = "HashIndex ($(length(i.dict)) unique element$(lengt
 Base.in(x, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}) = haskey(a.index.dict, x)
 
 function Base.count(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-    index = Base.ht_keyindex(a.index.dict, f.x)
-    if index < 0
-        return 0
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return length(@inbounds gettokenvalue(a.index.dict, token))
     else
-        return length(a.index.dict.vals[index])
+        return 0
     end
 end
 
 function Base.findall(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return Vector{eltype(keys(a))}()
-	else
-		return @inbounds a.index.dict.vals[index]
-	end
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return @inbounds gettokenvalue(a.index.dict, token)
+    else
+        return Vector{eltype(keys(a))}()
+    end
 end
 
 # TODO: findall for arbitrary predicates by just checking each unique key? (Sometimes faster, sometimes slower?)
 
 function Base.findfirst(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return nothing
-	else
-		return @inbounds first(a.index.dict.vals[index])
-	end
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return @inbounds first(gettokenvalue(a.index.dict, token))
+    else
+        return nothing
+    end
 end
 
 function Base.findlast(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return nothing
-	else
-		return @inbounds last(a.index.dict.vals[index])
-	end
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return @inbounds last(gettokenvalue(a.index.dict, token))
+    else
+        return nothing
+    end
 end
 
 function Base.filter(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return empty(a)
-	else
-		return @inbounds parent(a)[a.index.dict.vals[index]]
-	end
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return @inbounds parent(a)[(gettokenvalue(a.index.dict, token))]
+    else
+        return empty(a)
+    end
 end
 
 # TODO: filter for arbitrary predicates by just checking each unique key? (Sometimes faster, sometimes slower?)
 
 function Base.unique(a::AcceleratedArray{T, <:Any, <:Any, <:HashIndex}) where {T}
-	out = Vector{T}()
-	@inbounds for value in keys(a.index.dict)
-		push!(out, value)
-	end
-	return out
+    out = Vector{T}()
+    @inbounds for value in keys(a.index.dict)
+        push!(out, value)
+    end
+    return out
 end
 
-function SplitApplyCombine.group2(a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}, b::AbstractArray)
-    return Dict((key, @inbounds b[inds]) for (key, inds) in a.index.dict)
+function SplitApplyCombine.group(a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}, b::AbstractArray)
+    return map(inds -> @inbounds(b[inds]), a.index.dict)
 end
 
-function SplitApplyCombine.groupreduce(::typeof(identity), f, op, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}; kw...)
-	return Dict((k, mapreduce(i -> f(@inbounds a[i]), op, v; kw...)) for (k,v) in a.index.dict)
+function SplitApplyCombine.groupreduce(op::Callable, a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}; kw...)
+    return map(inds -> @inbounds(reduce(op, view(a, inds); kw...)), a.index.dict)
 end
 
-function SplitApplyCombine._groupinds(a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
-	return a.index.dict
+function SplitApplyCombine.groupfind(a::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex})
+    return a.index.dict
 end
 
 function SplitApplyCombine._innerjoin!(out, left::AbstractArray, right::AcceleratedArray{<:Any, <:Any, <:Any, <:HashIndex}, v::AbstractArray, ::typeof(isequal))
-    @boundscheck if (axes(l)..., axes(r)...) != axes(v)
+    @boundscheck if (axes(left)..., axes(right)...) != axes(v)
         throw(DimensionMismatch("innerjoin arrays do not have matching dimensions"))
     end
 
     dict = right.index.dict
 
-    @inbounds for i ∈ keys(left)
-        dict_index = Base.ht_keyindex(dict, left(i_l))
-        if dict_index > 0 # -1 if key not found
-            for i_r ∈ dict.vals[dict_index]
+    @inbounds for i_l ∈ keys(left)
+        (hasindex, token) = gettoken(right.index.dict, @inbounds left[i_l])
+        if hasindex
+            for i_r ∈ gettokenvalue(dict, token)
                 push!(out, v[Tuple(i_l)..., Tuple(i_r)...])
             end
         end
@@ -116,13 +116,13 @@ function SplitApplyCombine.leftgroupjoin(lkey, ::typeof(identity), f, ::typeof(i
     K = promote_op(lkey, eltype(left))
 
     dict = right.index.dict
-    out = Dict{K, Vector{T}}()
+    out = HashDictionary{K, Vector{T}}()
     for a ∈ left
         key = lkey(a)
         group = get!(() -> T[], out, key)
-        dict_index = Base.ht_keyindex(dict, key)
-        if dict_index > 0 # -1 if key not found
-            for b ∈ dict.vals[dict_index]
+        (hasindex, token) = gettoken(dict, key)
+        if hasindex
+            for b ∈ @inbounds gettokenvalue(dict, token)
                 push!(group, f(a, b))
             end
         end
diff --git a/src/UniqueHashIndex.jl b/src/UniqueHashIndex.jl
@@ -1,19 +1,19 @@
 # Hash table acceleration index
-struct UniqueHashIndex{D <: Dict} <: AbstractUniqueIndex
+struct UniqueHashIndex{D <: HashDictionary} <: AbstractUniqueIndex
     dict::D
 end 
 
 function UniqueHashIndex(a::AbstractArray)
-	dict = Dict{eltype(a), SingleVector{eltype(keys(a))}}()
+    dict = HashDictionary{eltype(a), SingleVector{eltype(keys(a))}}()
 
     @inbounds for i in keys(a)
         value = a[i]
-        index = Base.ht_keyindex2!(dict, value)
-	    if index > 0 # `value` found in `dict`
-	        error("Input not unique") # TODO Use appropriate Exception
-	    else # `value` is ready to be inserted into `dict` at slot `-index`
-	        @inbounds Base._setindex!(dict, SingleVector(i), value, -index)
-	    end
+        (hadindex, token) = gettoken!(dict, value)
+        if hadindex
+            error("Input not unique") # TODO Use appropriate Exception
+        else # `value` is ready to be inserted into `dict` at slot `-index`
+            @inbounds settokenvalue!(dict, token, SingleVector(i))
+        end
     end
     return UniqueHashIndex(dict)
 end
@@ -24,55 +24,53 @@ Base.summary(::UniqueHashIndex) = "UniqueHashIndex"
 Base.in(x, a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex}) = haskey(a.index.dict, x)
 
 function Base.count(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex})
-    index = Base.ht_keyindex(a.index.dict, f.x)
-    if index < 0
-        return 0
-    else
+    if f.x in keys(a.index.dict)
         return 1
+    else
+        return 0
     end
 end
 
 function Base.findall(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return MaybeVector{eltype(keys(a))}()
-	else
-		return MaybeVector(@inbounds a.index.dict.vals[index][])
-	end
+    (hasindex, token) = gettoken(a.index.dict, f.x)
+    if hasindex
+        return MaybeVector(@inbounds gettokenvalue(a.index.dict, token)[])
+    else
+        return MaybeVector{eltype(keys(a))}()
+    end
 end
 
 function Base.filter(f::Fix2{typeof(isequal)}, a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex})
-	index = Base.ht_keyindex(a.index.dict, f.x)
-	if index < 0
-		return MaybeVector{eltype(a)}()
-	else
-		return MaybeVector{eltype(a)}(f.x)
-	end
+    if f.x in keys(a.index.dict)
+        return MaybeVector{eltype(a)}(f.x)
+    else
+        return MaybeVector{eltype(a)}()
+    end
 end
 
-function SplitApplyCombine.group2(a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex}, b::AbstractArray)
-    return Dict((key, SingleVector(@inbounds b[inds[]])) for (key, inds) in a.index.dict)
+function SplitApplyCombine.group(a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex}, b::AbstractArray)
+    return map(inds -> SingleVector(@inbounds b[inds[]]), a.index.dict)
 end
 
 function SplitApplyCombine.groupreduce(::typeof(identity), f, op, a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex}; kw...)
-	return Dict((k, mapreduce(i -> f(@inbounds a[i]), op, v; kw...)) for (k, v) in a.index.dict)
+    return map(inds -> @inbounds(reduce(op, a[inds[]]; kw...)), a.index.dict)
 end
 
-function SplitApplyCombine._groupinds(a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex})
-	return a.index.dict
+function SplitApplyCombine.groupfind(a::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex})
+    return a.index.dict
 end
 
 function SplitApplyCombine._innerjoin!(out, left::AbstractArray, right::AcceleratedArray{<:Any, <:Any, <:Any, <:UniqueHashIndex}, v::AbstractArray, ::typeof(isequal))
-    @boundscheck if (axes(l)..., axes(r)...) != axes(v)
+    @boundscheck if (axes(left)..., axes(right)...) != axes(v)
         throw(DimensionMismatch("innerjoin arrays do not have matching dimensions"))
     end
 
     dict = right.index.dict
 
-    @inbounds for i ∈ keys(left)
-        dict_indUniqueex = Base.ht_keyindex(dict, left(i_l))
-        if dict_index > 0 # -1 if key not found
-            i_r = dict.vals[dict_index][]
+    @inbounds for i_l ∈ keys(left)
+        (hasindex, token) = gettoken(dict, left[i_l])
+        if hasindex
+            i_r = gettokenvalue(dict, token)[]
             push!(out, v[Tuple(i_l)..., Tuple(i_r)...])
         end
     end
diff --git a/test/HashIndex.jl b/test/HashIndex.jl
@@ -18,7 +18,7 @@
    @test issetequal(unique(b), [1,2,3])
 
    @test group(identity, b) == group(identity, a)
-   @test groupinds(identity, b) == groupinds(identity, a)
+   @test groupfind(identity, b) == groupfind(identity, a)
    @test groupreduce(identity, +, b) == groupreduce(identity, +, a)
 
    @test issetequal(innerjoin(identity, identity, tuple, isequal, b, [0, 1, 2]),
diff --git a/test/UniqueHashIndex.jl b/test/UniqueHashIndex.jl
@@ -19,7 +19,7 @@
     @test unique(b) === b
 
     @test group(iseven, b) == group(iseven, a)
-    @test groupinds(iseven, b) == groupinds(iseven, a)
+    @test groupfind(iseven, b) == groupfind(iseven, a)
     @test groupreduce(iseven, +, b) == groupreduce(iseven, +, a)
 
     @test issetequal(innerjoin(identity, identity, tuple, isequal, b, [0, 1, 2]),
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,8 +1,9 @@
 using Test
 using AcceleratedArrays
 using SplitApplyCombine
+using Dictionaries
 
-@test isempty(detect_ambiguities(Base, AcceleratedArrays))
+@test isempty(setdiff(detect_ambiguities(Base, AcceleratedArrays, Dictionaries), detect_ambiguities(Base, Dictionaries) ))
 
 @testset "AcceleratedArrays" begin
     include("Interval.jl")