Skip to content

Commit 7f8cb7e

Browse files
Skip GPU usage validation for kfto pytorch multi-node tests
1 parent 5b1258f commit 7f8cb7e

File tree

1 file changed

+0
-22
lines changed

1 file changed

+0
-22
lines changed

tests/kfto/kfto_mnist_training_test.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"bytes"
2121
"fmt"
2222
"testing"
23-
"time"
2423

2524
kftov1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
2625
. "github.com/onsi/gomega"
@@ -83,27 +82,6 @@ func runKFTOPyTorchMnistJob(t *testing.T, accelerator Accelerator, image string,
8382
test.Eventually(PyTorchJob(test, namespace.Name, tuningJob.Name), TestTimeoutDouble).
8483
Should(WithTransform(PyTorchJobConditionRunning, Equal(corev1.ConditionTrue)))
8584

86-
// Verify GPU utilization
87-
if IsOpenShift(test) && accelerator == NVIDIA {
88-
trainingPods := GetPods(test, namespace.Name, metav1.ListOptions{LabelSelector: "training.kubeflow.org/job-name=" + tuningJob.GetName()})
89-
test.Expect(trainingPods).To(HaveLen(workerReplicas + 1)) // +1 is a master node
90-
91-
for _, trainingPod := range trainingPods {
92-
// Check that GPUs for training pods were utilized recently
93-
test.Eventually(OpenShiftPrometheusGpuUtil(test, trainingPod, accelerator), 15*time.Minute).
94-
Should(
95-
And(
96-
HaveLen(numProcPerNode),
97-
ContainElement(
98-
// Check that at least some GPU was utilized on more than 20%
99-
HaveField("Value", BeNumerically(">", 20)),
100-
),
101-
),
102-
)
103-
}
104-
test.T().Log("All GPUs were successfully utilized")
105-
}
106-
10785
// Make sure the PyTorch job succeeded
10886
test.Eventually(PyTorchJob(test, namespace.Name, tuningJob.Name), TestTimeoutDouble).Should(WithTransform(PyTorchJobConditionSucceeded, Equal(corev1.ConditionTrue)))
10987
test.T().Logf("PytorchJob %s/%s ran successfully", tuningJob.Namespace, tuningJob.Name)

0 commit comments

Comments
 (0)