@@ -20,6 +20,7 @@ import (
2020 "bytes"
2121 "fmt"
2222 "testing"
23+ "time"
2324
2425 . "github.com/onsi/gomega"
2526 . "github.com/project-codeflare/codeflare-common/support"
@@ -77,11 +78,11 @@ func mnistRay(t *testing.T, numGpus int) {
7778 }
7879 clusterQueue := CreateKueueClusterQueue (test , cqSpec )
7980 defer test .Client ().Kueue ().KueueV1beta1 ().ClusterQueues ().Delete (test .Ctx (), clusterQueue .Name , metav1.DeleteOptions {})
80- localQueue := CreateKueueLocalQueue (test , namespace .Name , clusterQueue .Name )
81+ CreateKueueLocalQueue (test , namespace .Name , clusterQueue .Name , AsDefaultQueue )
8182
8283 // Test configuration
8384 jupyterNotebookConfigMapFileName := "mnist_ray_mini.ipynb"
84- mnist := readMnistPy (test )
85+ mnist := readMnistScriptTemplate (test , "resources/mnist.py" )
8586 if numGpus > 0 {
8687 mnist = bytes .Replace (mnist , []byte ("accelerator=\" has to be specified\" " ), []byte ("accelerator=\" gpu\" " ), 1 )
8788 } else {
@@ -91,7 +92,7 @@ func mnistRay(t *testing.T, numGpus int) {
9192 // MNIST Ray Notebook
9293 jupyterNotebookConfigMapFileName : ReadFile (test , "resources/mnist_ray_mini.ipynb" ),
9394 "mnist.py" : mnist ,
94- "requirements.txt" : readRequirementsTxt (test ),
95+ "requirements.txt" : ReadFile (test , "resources/requirements.txt" ),
9596 })
9697
9798 // Define the regular(non-admin) user
@@ -102,7 +103,7 @@ func mnistRay(t *testing.T, numGpus int) {
102103 CreateUserRoleBindingWithClusterRole (test , userName , namespace .Name , "admin" )
103104
104105 // Create Notebook CR
105- createNotebook (test , namespace , userToken , localQueue . Name , config .Name , jupyterNotebookConfigMapFileName , numGpus )
106+ createNotebook (test , namespace , userToken , config .Name , jupyterNotebookConfigMapFileName , numGpus )
106107
107108 // Gracefully cleanup Notebook
108109 defer func () {
@@ -111,7 +112,7 @@ func mnistRay(t *testing.T, numGpus int) {
111112 }()
112113
113114 // Make sure the RayCluster is created and running
114- test .Eventually (rayClusters (test , namespace ), TestTimeoutLong ).
115+ test .Eventually (RayClusters (test , namespace . Name ), TestTimeoutLong ).
115116 Should (
116117 And (
117118 HaveLen (1 ),
@@ -128,32 +129,53 @@ func mnistRay(t *testing.T, numGpus int) {
128129 ),
129130 )
130131
131- // Make sure the RayCluster finishes and is deleted
132- test .Eventually (rayClusters (test , namespace ), TestTimeoutLong ).
133- Should (HaveLen (0 ))
134- }
132+ time .Sleep (30 * time .Second )
135133
136- func readRequirementsTxt (test Test ) []byte {
137- // Read the requirements.txt from resources and perform replacements for custom values using go template
138- props := struct {
139- PipIndexUrl string
140- PipTrustedHost string
141- }{
142- PipIndexUrl : "--index " + string (GetPipIndexURL ()),
143- }
134+ // Fetch created raycluster
135+ rayClusterName := "mnisttest"
136+ rayCluster , err := test .Client ().Ray ().RayV1 ().RayClusters (namespace .Name ).Get (test .Ctx (), rayClusterName , metav1.GetOptions {})
137+ test .Expect (err ).ToNot (HaveOccurred ())
144138
145- // Provide trusted host only if defined
146- if len (GetPipTrustedHost ()) > 0 {
147- props .PipTrustedHost = "--trusted-host " + GetPipTrustedHost ()
139+ // Initialise raycluster client to interact with raycluster to get rayjob details using REST-API
140+ dashboardUrl := GetDashboardUrl (test , namespace , rayCluster )
141+ rayClusterClientConfig := RayClusterClientConfig {Address : dashboardUrl .String (), Client : nil , InsecureSkipVerify : true }
142+ rayClient , err := NewRayClusterClient (rayClusterClientConfig , test .Config ().BearerToken )
143+ if err != nil {
144+ test .T ().Errorf ("%s" , err )
148145 }
149146
150- template , err := files .ReadFile ("resources/requirements.txt" )
151- test .Expect (err ).NotTo (HaveOccurred ())
147+ jobID := GetTestJobId (test , rayClient , dashboardUrl .Host )
148+ test .Expect (jobID ).ToNot (Equal (nil ))
149+
150+ // Wait for the job to be succeeded or failed
151+ var rayJobStatus string
152+ fmt .Printf ("Waiting for job to be Succeeded...\n " )
153+ test .Eventually (func () string {
154+ resp , err := rayClient .GetJobDetails (jobID )
155+ test .Expect (err ).ToNot (HaveOccurred ())
156+ rayJobStatusVal := resp .Status
157+ if rayJobStatusVal == "SUCCEEDED" || rayJobStatusVal == "FAILED" {
158+ fmt .Printf ("JobStatus : %s\n " , rayJobStatusVal )
159+ rayJobStatus = rayJobStatusVal
160+ return rayJobStatus
161+ }
162+ if rayJobStatus != rayJobStatusVal && rayJobStatusVal != "SUCCEEDED" {
163+ fmt .Printf ("JobStatus : %s...\n " , rayJobStatusVal )
164+ rayJobStatus = rayJobStatusVal
165+ }
166+ return rayJobStatus
167+ }, TestTimeoutDouble , 3 * time .Second ).Should (Or (Equal ("SUCCEEDED" ), Equal ("FAILED" )), "Job did not complete within the expected time" )
168+ test .Expect (rayJobStatus ).To (Equal ("SUCCEEDED" ), "RayJob failed !" )
169+
170+ // Store job logs in output directory
171+ WriteRayJobAPILogs (test , rayClient , jobID )
152172
153- return ParseTemplate (test , template , props )
173+ // Make sure the RayCluster finishes and is deleted
174+ test .Eventually (RayClusters (test , namespace .Name ), TestTimeoutLong ).
175+ Should (HaveLen (0 ))
154176}
155177
156- func readMnistPy (test Test ) []byte {
178+ func readMnistScriptTemplate (test Test , filePath string ) []byte {
157179 // Read the mnist.py from resources and perform replacements for custom values using go template
158180 storage_bucket_endpoint , storage_bucket_endpoint_exists := GetStorageBucketDefaultEndpoint ()
159181 storage_bucket_access_key_id , storage_bucket_access_key_id_exists := GetStorageBucketAccessKeyId ()
@@ -184,7 +206,7 @@ func readMnistPy(test Test) []byte {
184206 StorageBucketMnistDir : storage_bucket_mnist_dir ,
185207 StorageBucketMnistDirExists : storage_bucket_mnist_dir_exists ,
186208 }
187- template , err := files .ReadFile ("resources/mnist.py" )
209+ template , err := files .ReadFile (filePath )
188210 test .Expect (err ).NotTo (HaveOccurred ())
189211
190212 return ParseTemplate (test , template , props )
0 commit comments