1+ import pkg_resources
2+
13from absl .testing import absltest
24from absl .testing import parameterized
35
@@ -9,6 +11,7 @@ class HorovodTestCase(DataprocTestCase):
911 INIT_ACTIONS = ["horovod/horovod.sh" ]
1012 GPU_INIT_ACTIONS = ["gpu/install_gpu_driver.sh" ] + INIT_ACTIONS
1113 GPU_P100 = "type=nvidia-tesla-p100"
14+ GPU_T4 = "type=nvidia-tesla-t4"
1215
1316 TENSORFLOW_TEST_SCRIPT = "scripts/verify_tensorflow.py"
1417 PYTORCH_TEST_SCRIPT = "scripts/verify_pytorch.py"
@@ -26,6 +29,8 @@ def _submit_spark_job(self, script):
2629 def test_horovod_cpu (self , configuration , controller ):
2730 if self .getImageOs () == 'rocky' :
2831 self .skipTest ("Not supported in Rocky Linux-based images" )
32+ if self .getImageVersion () > pkg_resources .parse_version ("2.0" ):
33+ self .skipTest ("Not supported in Dataproc image version 2.1 and 2.2" )
2934
3035 metadata = ""
3136 if controller == "mpi" :
@@ -44,16 +49,18 @@ def test_horovod_cpu(self, configuration, controller):
4449 def test_horovod_gpu (self , configuration , controller ):
4550 if self .getImageOs () == 'rocky' :
4651 self .skipTest ("Not supported in Rocky Linux-based images" )
52+ if self .getImageVersion () > pkg_resources .parse_version ("2.0" ):
53+ self .skipTest ("Not supported in Dataproc image version 2.1 and 2.2" )
4754
48- metadata = "cuda-version=11.1 ,cudnn-version=8.0.5.39 ,gpu-driver-provider=NVIDIA"
55+ metadata = "cuda-version=12.4 ,cudnn-version=9.1.0.70 ,gpu-driver-provider=NVIDIA"
4956
5057 self .createCluster (
5158 configuration ,
5259 self .GPU_INIT_ACTIONS ,
5360 timeout_in_minutes = 60 ,
5461 machine_type = "n1-standard-8" ,
55- master_accelerator = self .GPU_P100 ,
56- worker_accelerator = self .GPU_P100 ,
62+ master_accelerator = self .GPU_T4 ,
63+ worker_accelerator = self .GPU_T4 ,
5764 metadata = metadata )
5865
5966
0 commit comments