1- # ! /usr/bin/env bash
1+ ! /usr/bin/env bash
22
33# Copyright 2024 The Kubernetes Authors.
44#
@@ -84,6 +84,49 @@ function gpu-partition-count-from-logs {
8484 echo " $logs " | sed -nE " s/^declare -x GPU_DEVICE_${id} _PARTITION_COUNT=\" (.+)\" $/\1/p"
8585}
8686
87+ function verify-resourceclaim-device-status() {
88+ local ns=" $1 "
89+ echo " === Verifying ResourceClaim device data in namespace ${ns} ==="
90+
91+ local claim=" "
92+ for i in {1..30}; do
93+ claim=" $( kubectl get resourceclaim -n " ${ns} " -o jsonpath=' {.items[0].metadata.name}' 2> /dev/null || true) "
94+ if [[ -n " ${claim} " ]]; then
95+ break
96+ fi
97+ sleep 1
98+ done
99+
100+ if [[ -z " ${claim} " ]]; then
101+ echo " ERROR: no ResourceClaim found in namespace ${ns} "
102+ exit 1
103+ fi
104+
105+ echo " Found ResourceClaim ${ns} /${claim} , checking status.devices[0].data ..."
106+
107+ local uuid
108+ uuid=" $( kubectl get resourceclaim " ${claim} " -n " ${ns} " \
109+ -o jsonpath=' {.status.devices[0].data.uuid.string}' ) "
110+
111+ local driver_version
112+ driver_version=" $( kubectl get resourceclaim " ${claim} " -n " ${ns} " \
113+ -o jsonpath=' {.status.devices[0].data.driverVersion.version}' ) "
114+
115+ if [[ -z " ${uuid} " ]]; then
116+ echo " ERROR: ResourceClaim ${ns} /${claim} is missing .status.devices[0].data.uuid.string"
117+ kubectl get resourceclaim " ${claim} " -n " ${ns} " -o yaml
118+ exit 1
119+ fi
120+
121+ if [[ -z " ${driver_version} " ]]; then
122+ echo " ERROR: ResourceClaim ${ns} /${claim} is missing .status.devices[0].data.driverVersion.version"
123+ kubectl get resourceclaim " ${claim} " -n " ${ns} " -o yaml
124+ exit 1
125+ fi
126+
127+ echo " OK: ResourceClaim ${ns} /${claim} has device data (uuid=${uuid} , driverVersion=${driver_version} )"
128+ }
129+
87130declare -a observed_gpus
88131function gpu-already-seen {
89132 local gpu=" $1 "
@@ -101,6 +144,9 @@ if [ $gpu_test_1 != 2 ]; then
101144 exit 1
102145fi
103146
147+ # Verify that at least one ResourceClaim in gpu-test1 has device data
148+ verify-resourceclaim-device-status " gpu-test1"
149+
104150gpu_test1_pod0_ctr0_logs=$( kubectl logs -n gpu-test1 pod0 -c ctr0)
105151gpu_test1_pod0_ctr0_gpus=$( gpus-from-logs " $gpu_test1_pod0_ctr0_logs " )
106152gpu_test1_pod0_ctr0_gpus_count=$( echo " $gpu_test1_pod0_ctr0_gpus " | wc -w | tr -d ' ' )
0 commit comments