@@ -125,43 +125,43 @@ end
125125rng = Xoshiro (42 );
126126rngs = Tuple (Xoshiro (rand (rng, 1 : 10000 )) for _ in 1 : Threads. nthreads ());
127127
128- a = collect (1 : 10 ^ 7 );
128+ a = collect (1 : 10 ^ 8 );
129129wsa = Float64 .(a);
130130
131131times_other_parallel = Float64[]
132- for i in 0 : 6
133- b = @benchmark sample_parallel_2_pass ($ rngs, $ a, $ wsa, 10 ^ $ i)
132+ for i in 0 : 7
133+ b = @benchmark sample_parallel_2_pass ($ rngs, $ a, $ wsa, 10 ^ $ i) seconds = 20
134134 push! (times_other_parallel, median (b. times)/ 10 ^ 6 )
135135 println (" other $(10 ^ i) : $(median (b. times)/ 10 ^ 6 ) ms" )
136136end
137137
138138times_other = Float64[]
139- for i in 0 : 6
140- b = @benchmark sample ($ rng, $ a, Weights ($ wsa), 10 ^ $ i; replace = true )
139+ for i in 0 : 7
140+ b = @benchmark sample ($ rng, $ a, Weights ($ wsa), 10 ^ $ i; replace = true ) seconds = 20
141141 push! (times_other, median (b. times)/ 10 ^ 6 )
142142 println (" other $(10 ^ i) : $(median (b. times)/ 10 ^ 6 ) ms" )
143143end
144144
145145# # single thread
146146times_single_thread = Float64[]
147- for i in 0 : 6
148- b = @benchmark weighted_reservoir_sample ($ rng, $ a, $ wsa, 10 ^ $ i)
147+ for i in 0 : 7
148+ b = @benchmark weighted_reservoir_sample ($ rng, $ a, $ wsa, 10 ^ $ i) seconds = 20
149149 push! (times_single_thread, median (b. times)/ 10 ^ 6 )
150150 println (" sequential $(10 ^ i) : $(median (b. times)/ 10 ^ 6 ) ms" )
151151end
152152
153153# multi thread 1 pass - 6 threads
154154times_multi_thread = Float64[]
155- for i in 0 : 6
156- b = @benchmark weighted_reservoir_sample_parallel_1_pass ($ rngs, $ a, $ wsa, 10 ^ $ i)
155+ for i in 0 : 7
156+ b = @benchmark weighted_reservoir_sample_parallel_1_pass ($ rngs, $ a, $ wsa, 10 ^ $ i) seconds = 20
157157 push! (times_multi_thread, median (b. times)/ 10 ^ 6 )
158158 println (" parallel $(10 ^ i) : $(median (b. times)/ 10 ^ 6 ) ms" )
159159end
160160
161161# multi thread 2 pass - 6 threads
162162times_multi_thread_2 = Float64[]
163- for i in 0 : 6
164- b = @benchmark weighted_reservoir_sample_parallel_2_pass ($ rngs, $ a, $ wsa, 10 ^ $ i)
163+ for i in 0 : 7
164+ b = @benchmark weighted_reservoir_sample_parallel_2_pass ($ rngs, $ a, $ wsa, 10 ^ $ i) seconds = 20
165165 push! (times_multi_thread_2, median (b. times)/ 10 ^ 6 )
166166 println (" parallel $(10 ^ i) : $(median (b. times)/ 10 ^ 6 ) ms" )
167167end
@@ -170,13 +170,13 @@ py"""
170170import numpy as np
171171import timeit
172172
173- a = np.arange(1 , 10 ** 7 + 1 , dtype = np.int64);
174- wsa = np.arange(1 , 10 ** 7 + 1 , dtype = np.float64)
173+ a = np.arange(1 , 10 ** 8 + 1 , dtype = np.int64);
174+ wsa = np.arange(1 , 10 ** 8 + 1 , dtype = np.float64)
175175p = wsa/ np.sum(wsa);
176176
177177def sample_times_numpy ():
178178 times_numpy = []
179- for i in range (7 ):
179+ for i in range (8 ):
180180 ts = []
181181 for j in range (11 ):
182182 t = timeit.timeit(" np.random.choice(a, size=10**i, replace=True, p=p)" ,
@@ -196,20 +196,20 @@ ax1 = Axis(f[1, 1], yscale=log10, xscale=log10,
196196 yminorticksvisible = true , yminorgridvisible = true ,
197197 yminorticks = IntervalsBetween (10 ))
198198
199- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_numpy[2 : end ], label = " numpy.choice sequential" , marker = :circle , markersize = 12 , linestyle = :dot )
200- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_other[2 : end ], label = " StatsBase.sample sequential" , marker = :rect , markersize = 12 , linestyle = :dot )
201- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_other_parallel[2 : end ], label = " StatsBase.sample parallel (2 passes)" , marker = :diamond , markersize = 12 , linestyle = :dot )
202- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_single_thread[2 : end ], label = " WRSWR-SKIP sequential" , marker = :hexagon , markersize = 12 , linestyle = :dot )
203- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_multi_thread[2 : end ], label = " WRSWR-SKIP parallel (1 pass)" , marker = :cross , markersize = 12 , linestyle = :dot )
204- scatterlines! (ax1, [10 ^ i/ 10 ^ 7 for i in 1 : 6 ], times_multi_thread_2[2 : end ], label = " WRSWR-SKIP parallel (2 passes)" , marker = :xcross , markersize = 12 , linestyle = :dot )
205- Legend (f[1 , 2 ], ax1, labelsize= 10 , framevisible = false )
199+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_numpy[3 : end ], label = " numpy.choice sequential" , marker = :circle , markersize = 12 , linestyle = :dot )
200+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_other[3 : end ], label = " StatsBase.sample sequential" , marker = :rect , markersize = 12 , linestyle = :dot )
201+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_other_parallel[3 : end ], label = " StatsBase.sample parallel (2 passes)" , marker = :diamond , markersize = 12 , linestyle = :dot )
202+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_single_thread[3 : end ], label = " WRSWR-SKIP sequential" , marker = :hexagon , markersize = 12 , linestyle = :dot )
203+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_multi_thread[3 : end ], label = " WRSWR-SKIP parallel (1 pass)" , marker = :cross , markersize = 12 , linestyle = :dot )
204+ scatterlines! (ax1, [10 ^ i/ 10 ^ 8 for i in 2 : 7 ], times_multi_thread_2[3 : end ], label = " WRSWR-SKIP parallel (2 passes)" , marker = :xcross , markersize = 12 , linestyle = :dot )
205+ Legend (f[2 , 1 ], ax1, labelsize= 10 , framevisible = false , orientation = :horizontal )
206206
207207ax1. xtickformat = x -> string .(round .(x.* 100 , digits= 10 )) .* " %"
208208ax1. title = " Comparison between weighted sampling algorithms in a non-streaming context"
209- ax1. xticks = [10 ^ (i)/ 10 ^ 7 for i in 1 : 6 ]
209+ ax1. xticks = [10 ^ (i)/ 10 ^ 8 for i in 2 : 7 ]
210210
211211ax1. xlabel = " sample ratio"
212212ax1. ylabel = " time (ms)"
213213
214214f
215- save (" comparison_WRSWR_SKIP_alg .png" , f)
215+ save (" comparison_WRSWR_SKIP_alg_no_stream .png" , f)
0 commit comments