Skip to content

Commit 1e834b8

Browse files
llewelldIain-S
authored andcommitted
Adjust scaling and improve efficiency
Adjusts the scaling so that the scale of elements in the matrices doubles every 128 steps (every 8 datapoints). Improves efficiency by not recalculating steps multiple times. This greatly reduces the time needed to run all of the tests.
1 parent c167bcc commit 1e834b8

File tree

5 files changed

+63
-40
lines changed

5 files changed

+63
-40
lines changed

calculations/batch/bask-local-calcs.sh renamed to calculations/batch/bask-srun-calcs.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
# vim: et:ts=4:sts=4:sw=4
33

44
# Execute using:
5-
# ./bask-local-calcs.sh
5+
# srun --qos turing --account usjs9456-ati-test --time 1:00:00 --nodes 1 --gpus 1 --cpus-per-gpu 36 --mem 16384 --pty /bin/bash
6+
# ./bask-srun-calcs.sh
67

78
echo "## Aurora calculation testing script starting"
89

calculations/batch/bask-local-plot.sh renamed to calculations/batch/bask-srun-plot.sh

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
# vim: et:ts=4:sts=4:sw=4
33

44
# Execute using:
5-
# ./bask-local-calcs.sh
5+
# srun --qos turing --account usjs9456-ati-test --time 1:00:00 --nodes 1 --gpus 1 --cpus-per-gpu 36 --mem 16384 --pty /bin/bash
6+
# ./bask-srun-calcs.sh
67

78
echo "## Aurora calculation testing script starting"
89

@@ -34,18 +35,18 @@ pip install --quiet typing_extensions==4.14.1
3435
echo "## Plotting graphs"
3536

3637
# Render some graphs - GPU plots
37-
python plot.py -x "Multiply-and-Add" -o "dev-mad-1024x1024-0042" -i "calcs-dawn-xpu-0042.csv" -i "calcs-bask-gpu-0042.csv"
38-
python plot.py -x "Multiply-and-Add" -o "dev-mad-1024x1024-0043" -i "calcs-dawn-xpu-0043.csv" -i "calcs-bask-gpu-0043.csv"
38+
python plot.py -x "Multiply-and-Add" -o "dev-mad-1024x1024-s42" -i "calcs-dawn-xpu-mad-1024x1024-s42.csv" -i "calcs-bask-gpu-mad-1024x1024-s42.csv"
39+
python plot.py -x "Multiply-and-Add" -o "dev-mad-1024x1024-s43" -i "calcs-dawn-xpu-mad-1024x1024-s43.csv" -i "calcs-bask-gpu-mad-1024x1024-s43.csv"
3940
python plot.py -x "Fuzzed Multiply-and-Add" -o "dev-mad-fuzz-1024x1024" -i "calcs-dawn-xpu-mad-fuzz-1024x1024.csv" -i "calcs-bask-gpu-mad-fuzz-1024x1024.csv"
40-
python plot.py -x "Multiply-and-Add" -o "dev-mad-3x3" -i "calcs-dawn-xpu-mad-3x3.csv" -i "calcs-bask-gpu-mad-3x3.csv"
41+
python plot.py -x "Multiply-and-Add" -o "dev-mad-16x16" -i "calcs-dawn-xpu-mad-16x16.csv" -i "calcs-bask-gpu-mad-16x16.csv"
4142
python plot.py -x "Multiply" -o "dev-mul-1024x1024" -i "calcs-dawn-xpu-mul-1024x1024.csv" -i "calcs-bask-gpu-mul-1024x1024.csv"
4243
python plot.py -x "Addition" -o "dev-add-1024x1024" -i "calcs-dawn-xpu-add-1024x1024.csv" -i "calcs-bask-gpu-add-1024x1024.csv"
4344

4445
# Render some graphs - CPU plots
45-
python plot.py -x "Multiply-and-Add" -o "cpu-mad-1024x1024-0042" -i "calcs-dawn-cpu-0042.csv" -i "calcs-bask-cpu-0042.csv"
46-
python plot.py -x "Multiply-and-Add" -o "cpu-mad-1024x1024-0043" -i "calcs-dawn-cpu-0043.csv" -i "calcs-bask-cpu-0043.csv"
46+
python plot.py -x "Multiply-and-Add" -o "cpu-mad-1024x1024-s42" -i "calcs-dawn-cpu-mad-1024x1024-s42.csv" -i "calcs-bask-cpu-mad-1024x1024-s42.csv"
47+
python plot.py -x "Multiply-and-Add" -o "cpu-mad-1024x1024-s43" -i "calcs-dawn-cpu-mad-1024x1024-s43.csv" -i "calcs-bask-cpu-mad-1024x1024-s43.csv"
4748
python plot.py -x "Fuzzed Multiply-and-Add" -o "cpu-mad-fuzz-1024x1024" -i "calcs-dawn-cpu-mad-fuzz-1024x1024.csv" -i "calcs-bask-cpu-mad-fuzz-1024x1024.csv"
48-
python plot.py -x "Multiply-and-Add" -o "cpu-mad-3x3" -i "calcs-dawn-cpu-mad-3x3.csv" -i "calcs-bask-cpu-mad-3x3.csv"
49+
python plot.py -x "Multiply-and-Add" -o "cpu-mad-16x16" -i "calcs-dawn-cpu-mad-16x16.csv" -i "calcs-bask-cpu-mad-16x16.csv"
4950
python plot.py -x "Multiply" -o "cpu-mul-1024x1024" -i "calcs-dawn-cpu-mul-1024x1024.csv" -i "calcs-bask-cpu-mul-1024x1024.csv"
5051
python plot.py -x "Addition" -o "cpu-add-1024x1024" -i "calcs-dawn-cpu-add-1024x1024.csv" -i "calcs-bask-cpu-add-1024x1024.csv"
5152

calculations/batch/dawn-local-calcs.sh renamed to calculations/batch/dawn-srun-calcs.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
# vim: et:ts=4:sts=4:sw=4
33

44
# Execute using:
5-
# ./dawn-local-calcs.sh
5+
# srun --account airr-p8-rcpp-dawn-gpu --partition pvc9 --time 1:00:00 --nodes 1 --gres gpu:1 --pty bash
6+
# ./dawn-srun-calcs.sh
67

78
echo "## Aurora calculation testing script starting"
89

calculations/calculate.py

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -52,37 +52,51 @@ def random_check(self):
5252
print("Random number generation check end")
5353
print()
5454

55-
def get_rand_matrix(self, dtype, scale, offset):
56-
"""Returns a diagonal matrix of shape (size, size), values between 0.999 and 1.111"""
55+
def get_rand_matrix(self, scale, offset):
56+
"""Return random matrices of shape (size, size) with elements within given ranges.
57+
58+
Returns two identical matrices, one on the CPU and one on the GPU, cast to the
59+
specified type.
60+
61+
Each matrix is of shape (size, size) containing values chosen uniform at random
62+
within the internal [offset, offset + scale).
63+
64+
One first matrix will be on the CPU, the second will be on the GPU.
65+
66+
The first matrix will be in float64 format, the second contains the same values cast
67+
to the specified format.
68+
69+
Arguments:
70+
scale: the width of the interval to draw random values from.
71+
offset: the start of the interval to draw random values from.
72+
73+
Returns: (result_cpu, result): the same matrices on CPU and GPU respectively.
74+
"""
5775
# Perform random generation on the CPU to avoid impacts from discrepancies betweeen GPUs
58-
result_cpu = (2.0 * (torch.rand((self.size, self.size), dtype=torch.float64, device="cpu") - 1) * scale) + offset
76+
result_cpu = (torch.rand((self.size, self.size), dtype=torch.float64, device="cpu") * scale) + offset
5977
result_device = result_cpu.detach().clone()
6078
result_device = result_device.to(self.device)
61-
#value = result[0,0].item()
6279
return result_cpu, result_device
6380

64-
def matrix_multiple_mad(self, multiplier, accumulator, iterations, dtype, device):
81+
def matrix_multiple_mad(self, result, multiplier, accumulator, iterations, dtype, device):
6582
"""Perform matrix multiplication-and-adds at different precisions"""
6683
multiplier_cast = multiplier.type(dtype)
6784
accumulator_cast = accumulator.type(dtype)
68-
result = torch.eye(self.size, dtype=dtype, device=device)
6985
for count in range(iterations):
7086
result = torch.mm(result, multiplier_cast)
7187
result = torch.add(result, accumulator_cast)
7288
return result
7389

74-
def matrix_multiple_mul(self, multiplier, _, iterations, dtype, device):
90+
def matrix_multiple_mul(self, result, multiplier, _, iterations, dtype, device):
7591
"""Perform matrix multiplications at different precisions"""
7692
multiplier_cast = multiplier.type(dtype)
77-
result = torch.eye(self.size, dtype=dtype, device=device)
7893
for count in range(iterations):
7994
result = torch.mm(result, multiplier_cast)
8095
return result
8196

82-
def matrix_multiple_add(self, _, accumulator, iterations, dtype, device):
97+
def matrix_multiple_add(self, result, _, accumulator, iterations, dtype, device):
8398
"""Perform matrix multiplication-and-adds at different precisions"""
8499
accumulator_cast = accumulator.type(dtype)
85-
result = torch.zeros(self.size, dtype=dtype, device=device)
86100
for count in range(iterations):
87101
result = torch.add(result, accumulator_cast)
88102
return result
@@ -158,19 +172,28 @@ def generate_data(self, filename, matrix_operation, fuzz, name):
158172
steps_list = []
159173
self.reset_datalines()
160174
mse_lists = [[] for _ in range(len(self.precisions))]
175+
double_steps = 128
176+
scale_add = 1 / (2**20)
177+
scale_mul = (2**(1 / double_steps)) / self.size
161178

162-
multiplier_cpu, multiplier_device = self.get_rand_matrix(torch.float64, 0.1 / self.size, 1.0 / self.size)
163-
accumulator_cpu, accumulator_device = self.get_rand_matrix(torch.float64, 0.1, 0.0)
164-
for steps in range(16 * self.scalefactor, 513 * self.scalefactor, 16 * self.scalefactor):
165-
if fuzz:
166-
multiplier_cpu, multiplier_device = self.get_rand_matrix(torch.float64, 0.1 / self.size, 1.0 / self.size)
167-
accumulator_cpu, accumulator_device = self.get_rand_matrix(torch.float64, 0.1, 0.0)
168-
steps_list.append(steps)
169-
result_cpu = matrix_operation(multiplier_cpu, accumulator_cpu, steps, torch.float64, "cpu")
170-
for pos, precision in enumerate(self.precisions):
171-
result = matrix_operation(multiplier_device, accumulator_device, steps, precision, self.device)
179+
multiplier_cpu, multiplier_device = self.get_rand_matrix(0.002 * scale_mul, 0.999 * scale_mul)
180+
accumulator_cpu, accumulator_device = self.get_rand_matrix(0.002 * scale_add, 0.999 * scale_add)
181+
for pos, precision in enumerate(self.precisions):
182+
result_cpu = torch.eye(self.size, dtype=torch.float64, device="cpu")
183+
result = torch.eye(self.size, dtype=precision, device=self.device)
184+
prev = 0
185+
for steps in range(16 * self.scalefactor, 513 * self.scalefactor, 16 * self.scalefactor):
186+
if fuzz:
187+
multiplier_cpu, multiplier_device = self.get_rand_matrix(0.002 * scale_mul, 0.999 * scale_mul)
188+
accumulator_cpu, accumulator_device = self.get_rand_matrix(0.002 * scale_add, 0.999 * scale_add)
189+
if pos == 0:
190+
steps_list.append(steps)
191+
result_cpu = matrix_operation(result_cpu, multiplier_cpu, accumulator_cpu, steps - prev, torch.float64, "cpu")
192+
result = matrix_operation(result, multiplier_device, accumulator_device, steps - prev, precision, self.device)
193+
value = result_cpu[0,0].item()
172194
mse = self.matrix_compare(result_cpu, result.cpu())
173195
mse_lists[pos].append(mse.cpu())
196+
prev = steps
174197

175198
self.steps = steps_list
176199
for pos, precision in enumerate(self.precisions):
@@ -207,15 +230,19 @@ def main():
207230
parser.add_argument("--fileout", "-o", type=str, required=True, help="Filename to output the data to")
208231
args = parser.parse_args()
209232

210-
calc = Calculations(3, 10, 42, args.accelerator, args.prefix)
233+
calc = Calculations(16, 10, 42, args.accelerator, args.prefix)
211234
calc.random_check()
212235

213-
calc = Calculations(3, 10, 42, args.accelerator, args.prefix)
214-
fileout = calc.suffix_path(args.fileout, "-mad-3x3")
236+
calc = Calculations(16, 1, 42, args.accelerator, args.prefix)
237+
fileout = calc.suffix_path(args.fileout, "-mad-16x16")
215238
calc.generate_data_mad(fileout)
216239

217240
calc = Calculations(1024, 1, 42, args.accelerator, args.prefix)
218-
fileout = calc.suffix_path(args.fileout, "-mad-1024x1024")
241+
fileout = calc.suffix_path(args.fileout, "-mad-1024x1024-s42")
242+
calc.generate_data_mad(fileout)
243+
244+
calc = Calculations(1024, 1, 43, args.accelerator, args.prefix)
245+
fileout = calc.suffix_path(args.fileout, "-mad-1024x1024-s43")
219246
calc.generate_data_mad(fileout)
220247

221248
calc = Calculations(1024, 1, 42, args.accelerator, args.prefix)
@@ -230,10 +257,5 @@ def main():
230257
fileout = calc.suffix_path(args.fileout, "-mad-fuzz-1024x1024")
231258
calc.generate_data_mad_fuzz(fileout)
232259

233-
for seed in range(42, 52):
234-
calc = Calculations(1024, 1, seed, args.accelerator, args.prefix)
235-
fileout = calc.index_path(args.fileout, seed)
236-
calc.generate_data_mad(fileout)
237-
238260
if __name__ == "__main__":
239261
main()

calculations/requirements.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)