@@ -20,7 +20,6 @@ import (
2020 "bytes"
2121 "fmt"
2222 "testing"
23- "time"
2423
2524 kftov1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
2625 . "github.com/onsi/gomega"
@@ -83,27 +82,6 @@ func runKFTOPyTorchMnistJob(t *testing.T, accelerator Accelerator, image string,
8382 test .Eventually (PyTorchJob (test , namespace .Name , tuningJob .Name ), TestTimeoutDouble ).
8483 Should (WithTransform (PyTorchJobConditionRunning , Equal (corev1 .ConditionTrue )))
8584
86- // Verify GPU utilization
87- if IsOpenShift (test ) && accelerator == NVIDIA {
88- trainingPods := GetPods (test , namespace .Name , metav1.ListOptions {LabelSelector : "training.kubeflow.org/job-name=" + tuningJob .GetName ()})
89- test .Expect (trainingPods ).To (HaveLen (workerReplicas + 1 )) // +1 is a master node
90-
91- for _ , trainingPod := range trainingPods {
92- // Check that GPUs for training pods were utilized recently
93- test .Eventually (OpenShiftPrometheusGpuUtil (test , trainingPod , accelerator ), 15 * time .Minute ).
94- Should (
95- And (
96- HaveLen (numProcPerNode ),
97- ContainElement (
98- // Check that at least some GPU was utilized on more than 20%
99- HaveField ("Value" , BeNumerically (">" , 20 )),
100- ),
101- ),
102- )
103- }
104- test .T ().Log ("All GPUs were successfully utilized" )
105- }
106-
10785 // Make sure the PyTorch job succeeded
10886 test .Eventually (PyTorchJob (test , namespace .Name , tuningJob .Name ), TestTimeoutDouble ).Should (WithTransform (PyTorchJobConditionSucceeded , Equal (corev1 .ConditionTrue )))
10987 test .T ().Logf ("PytorchJob %s/%s ran successfully" , tuningJob .Namespace , tuningJob .Name )
0 commit comments