@@ -52,37 +52,51 @@ def random_check(self):
5252 print ("Random number generation check end" )
5353 print ()
5454
55- def get_rand_matrix (self , dtype , scale , offset ):
56- """Returns a diagonal matrix of shape (size, size), values between 0.999 and 1.111"""
55+ def get_rand_matrix (self , scale , offset ):
56+ """Return random matrices of shape (size, size) with elements within given ranges.
57+
58+ Returns two identical matrices, one on the CPU and one on the GPU, cast to the
59+ specified type.
60+
61+ Each matrix is of shape (size, size) containing values chosen uniform at random
62+ within the internal [offset, offset + scale).
63+
64+ One first matrix will be on the CPU, the second will be on the GPU.
65+
66+ The first matrix will be in float64 format, the second contains the same values cast
67+ to the specified format.
68+
69+ Arguments:
70+ scale: the width of the interval to draw random values from.
71+ offset: the start of the interval to draw random values from.
72+
73+ Returns: (result_cpu, result): the same matrices on CPU and GPU respectively.
74+ """
5775 # Perform random generation on the CPU to avoid impacts from discrepancies betweeen GPUs
58- result_cpu = (2.0 * ( torch .rand ((self .size , self .size ), dtype = torch .float64 , device = "cpu" ) - 1 ) * scale ) + offset
76+ result_cpu = (torch .rand ((self .size , self .size ), dtype = torch .float64 , device = "cpu" ) * scale ) + offset
5977 result_device = result_cpu .detach ().clone ()
6078 result_device = result_device .to (self .device )
61- #value = result[0,0].item()
6279 return result_cpu , result_device
6380
64- def matrix_multiple_mad (self , multiplier , accumulator , iterations , dtype , device ):
81+ def matrix_multiple_mad (self , result , multiplier , accumulator , iterations , dtype , device ):
6582 """Perform matrix multiplication-and-adds at different precisions"""
6683 multiplier_cast = multiplier .type (dtype )
6784 accumulator_cast = accumulator .type (dtype )
68- result = torch .eye (self .size , dtype = dtype , device = device )
6985 for count in range (iterations ):
7086 result = torch .mm (result , multiplier_cast )
7187 result = torch .add (result , accumulator_cast )
7288 return result
7389
74- def matrix_multiple_mul (self , multiplier , _ , iterations , dtype , device ):
90+ def matrix_multiple_mul (self , result , multiplier , _ , iterations , dtype , device ):
7591 """Perform matrix multiplications at different precisions"""
7692 multiplier_cast = multiplier .type (dtype )
77- result = torch .eye (self .size , dtype = dtype , device = device )
7893 for count in range (iterations ):
7994 result = torch .mm (result , multiplier_cast )
8095 return result
8196
82- def matrix_multiple_add (self , _ , accumulator , iterations , dtype , device ):
97+ def matrix_multiple_add (self , result , _ , accumulator , iterations , dtype , device ):
8398 """Perform matrix multiplication-and-adds at different precisions"""
8499 accumulator_cast = accumulator .type (dtype )
85- result = torch .zeros (self .size , dtype = dtype , device = device )
86100 for count in range (iterations ):
87101 result = torch .add (result , accumulator_cast )
88102 return result
@@ -158,19 +172,28 @@ def generate_data(self, filename, matrix_operation, fuzz, name):
158172 steps_list = []
159173 self .reset_datalines ()
160174 mse_lists = [[] for _ in range (len (self .precisions ))]
175+ double_steps = 128
176+ scale_add = 1 / (2 ** 20 )
177+ scale_mul = (2 ** (1 / double_steps )) / self .size
161178
162- multiplier_cpu , multiplier_device = self .get_rand_matrix (torch .float64 , 0.1 / self .size , 1.0 / self .size )
163- accumulator_cpu , accumulator_device = self .get_rand_matrix (torch .float64 , 0.1 , 0.0 )
164- for steps in range (16 * self .scalefactor , 513 * self .scalefactor , 16 * self .scalefactor ):
165- if fuzz :
166- multiplier_cpu , multiplier_device = self .get_rand_matrix (torch .float64 , 0.1 / self .size , 1.0 / self .size )
167- accumulator_cpu , accumulator_device = self .get_rand_matrix (torch .float64 , 0.1 , 0.0 )
168- steps_list .append (steps )
169- result_cpu = matrix_operation (multiplier_cpu , accumulator_cpu , steps , torch .float64 , "cpu" )
170- for pos , precision in enumerate (self .precisions ):
171- result = matrix_operation (multiplier_device , accumulator_device , steps , precision , self .device )
179+ multiplier_cpu , multiplier_device = self .get_rand_matrix (0.002 * scale_mul , 0.999 * scale_mul )
180+ accumulator_cpu , accumulator_device = self .get_rand_matrix (0.002 * scale_add , 0.999 * scale_add )
181+ for pos , precision in enumerate (self .precisions ):
182+ result_cpu = torch .eye (self .size , dtype = torch .float64 , device = "cpu" )
183+ result = torch .eye (self .size , dtype = precision , device = self .device )
184+ prev = 0
185+ for steps in range (16 * self .scalefactor , 513 * self .scalefactor , 16 * self .scalefactor ):
186+ if fuzz :
187+ multiplier_cpu , multiplier_device = self .get_rand_matrix (0.002 * scale_mul , 0.999 * scale_mul )
188+ accumulator_cpu , accumulator_device = self .get_rand_matrix (0.002 * scale_add , 0.999 * scale_add )
189+ if pos == 0 :
190+ steps_list .append (steps )
191+ result_cpu = matrix_operation (result_cpu , multiplier_cpu , accumulator_cpu , steps - prev , torch .float64 , "cpu" )
192+ result = matrix_operation (result , multiplier_device , accumulator_device , steps - prev , precision , self .device )
193+ value = result_cpu [0 ,0 ].item ()
172194 mse = self .matrix_compare (result_cpu , result .cpu ())
173195 mse_lists [pos ].append (mse .cpu ())
196+ prev = steps
174197
175198 self .steps = steps_list
176199 for pos , precision in enumerate (self .precisions ):
@@ -207,15 +230,19 @@ def main():
207230 parser .add_argument ("--fileout" , "-o" , type = str , required = True , help = "Filename to output the data to" )
208231 args = parser .parse_args ()
209232
210- calc = Calculations (3 , 10 , 42 , args .accelerator , args .prefix )
233+ calc = Calculations (16 , 10 , 42 , args .accelerator , args .prefix )
211234 calc .random_check ()
212235
213- calc = Calculations (3 , 10 , 42 , args .accelerator , args .prefix )
214- fileout = calc .suffix_path (args .fileout , "-mad-3x3 " )
236+ calc = Calculations (16 , 1 , 42 , args .accelerator , args .prefix )
237+ fileout = calc .suffix_path (args .fileout , "-mad-16x16 " )
215238 calc .generate_data_mad (fileout )
216239
217240 calc = Calculations (1024 , 1 , 42 , args .accelerator , args .prefix )
218- fileout = calc .suffix_path (args .fileout , "-mad-1024x1024" )
241+ fileout = calc .suffix_path (args .fileout , "-mad-1024x1024-s42" )
242+ calc .generate_data_mad (fileout )
243+
244+ calc = Calculations (1024 , 1 , 43 , args .accelerator , args .prefix )
245+ fileout = calc .suffix_path (args .fileout , "-mad-1024x1024-s43" )
219246 calc .generate_data_mad (fileout )
220247
221248 calc = Calculations (1024 , 1 , 42 , args .accelerator , args .prefix )
@@ -230,10 +257,5 @@ def main():
230257 fileout = calc .suffix_path (args .fileout , "-mad-fuzz-1024x1024" )
231258 calc .generate_data_mad_fuzz (fileout )
232259
233- for seed in range (42 , 52 ):
234- calc = Calculations (1024 , 1 , seed , args .accelerator , args .prefix )
235- fileout = calc .index_path (args .fileout , seed )
236- calc .generate_data_mad (fileout )
237-
238260if __name__ == "__main__" :
239261 main ()
0 commit comments