Skip to content

Commit 0749895

Browse files
authored
Merge pull request #192 from t-mialve/t-mialve/fix-tests
Fix instability in long haul tests
2 parents aac2d50 + 71b5655 commit 0749895

File tree

15 files changed

+42
-31
lines changed

15 files changed

+42
-31
lines changed

deploy/example/echodate/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ spec:
88
- ReadWriteMany
99
resources:
1010
requests:
11-
storage: 1Gi
11+
storage: 4Ti
1212
storageClassName: sc.azurelustre.csi.azure.com
1313
---
1414
apiVersion: apps/v1

docs/examples/pv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ spec:
1010
capacity:
1111
# This field should be the true size of the Azure Lustre you want
1212
# to used. So that, k8s can allocate resources better.
13-
storage: 4Ti
13+
storage: 48Ti
1414
csi:
1515
driver: azurelustre.csi.azure.com
1616
volumeAttributes:

docs/examples/pv_subdir.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ spec:
1010
capacity:
1111
# This field should be the true size of the Azure Lustre you want
1212
# to used. So that, k8s can allocate resources better.
13-
storage: 4Ti
13+
storage: 48Ti
1414
csi:
1515
driver: azurelustre.csi.azure.com
1616
volumeAttributes:

docs/examples/pvc_storageclass.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ spec:
1010
resources:
1111
requests:
1212
# The real storage capacity in the claim
13-
storage: 1Gi
13+
storage: 4Ti
1414
# This field must be the same as the storage class name in StorageClass
1515
storageClassName: sc.azurelustre.csi.azure.com

docs/examples/pvc_storageclass_subdir.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ spec:
1010
resources:
1111
requests:
1212
# The real storage capacity in the claim
13-
storage: 1Gi
13+
storage: 4Ti
1414
# This field must be the same as the storage class name in StorageClass
1515
storageClassName: subdir.azurelustre.csi.azure.com

hack/verify-integration-test-aks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function catlog {
4040
trap catlog ERR EXIT
4141

4242
./kubectl wait --for=condition=Ready pod/aml-integration-test --timeout=60s
43-
./kubectl wait --for=condition=Ready=false pod/aml-integration-test --timeout=300s
43+
./kubectl wait --for=condition=Ready=false pod/aml-integration-test --timeout=600s
4444

4545
exit_code=$(./kubectl get pod aml-integration-test -o=jsonpath='{.status.containerStatuses[*].state.*.exitCode}')
4646

test/external-e2e/run.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ echo "deploy test pvc"
6060
kubectl apply -f ${claim_file}
6161
echo "wait pvc to Bound status"
6262
# wait for json is supported in kubectl v1.24
63-
kubectl wait --for=jsonpath='{.status.phase}'=Bound -f ${claim_file} --timeout=300s
63+
kubectl wait --for=jsonpath='{.status.phase}'=Bound -f ${claim_file} --timeout=600s
6464
bounded_pv=$(kubectl get -f ${claim_file} -ojsonpath='{.spec.volumeName}')
6565
echo "bounded pv is ${bounded_pv}"
6666
echo "delete pvc"
6767
kubectl delete -f ${claim_file}
6868
echo "wait for the pvc to be deleted"
69-
kubectl wait --for=delete -f ${claim_file} --timeout=300s
69+
kubectl wait --for=delete -f ${claim_file} --timeout=600s
7070
echo "wait for pv ${bounded_pv} to be deleted"
71-
kubectl wait --for=delete pv/${bounded_pv} --timeout=300s
71+
kubectl wait --for=delete pv/${bounded_pv} --timeout=600s
7272

7373
echo "delete test storageclass"
7474
kubectl delete -f ${sc_file}

test/external-e2e/testdriver-azurelustre.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ StorageClass:
77
DriverInfo:
88
Name: azurelustre.csi.azure.com
99
SupportedSizeRange:
10-
Max: 8Ti
10+
Max: 48Ti
1111
Min: 4Ti
1212
RequiredAccessModes:
1313
- ReadWriteMany

test/long-haul/cleanup/cleanupjob.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ spec:
1919
- ReadWriteMany
2020
resources:
2121
requests:
22-
storage: 1Gi
22+
storage: 48Ti
2323
storageClassName: azurelustre-longhaulcleanup-sc
2424

2525
---

test/long-haul/fault-test.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ sleep $SleepInSecs
3838
verify_sample_workload_by_pod_status workloadPodNameNew workloadNodeNameNew
3939
if [[ "$workloadPodName" == "$workloadPodNameNew" ]] ; then
4040
print_logs_error "workload pod $workloadPodName should be killed and new workload should be started"
41-
print_debug_on_ERR
41+
print_debug
4242
fast_exit
4343
fi
4444

@@ -92,20 +92,20 @@ print_logs_info "running 'kubectl delete po' by background task"
9292
sleep $SleepInSecs
9393

9494
podState=$(get_pod_state $workloadPodName $workloadNodeName)
95-
if [[ -z $podState || "$podState" != "Terminating" ]]; then
96-
print_logs_error "Workload pod $workloadPodName should be in Terminating state on node $workloadNodeName, but its actual state is $podState"
97-
print_debug_on_ERR
95+
if [[ "$podState" != "Terminating" && "$podState" != "Error" ]]; then
96+
print_logs_error "Workload pod $workloadPodName should be in Error/Terminating state on node $workloadNodeName, but its actual state is $podState"
97+
print_debug
9898
fast_exit
9999
else
100-
print_logs_info "Workload pod $workloadPodName is in Terminating state on node $workloadNodeName"
100+
print_logs_info "Workload pod $workloadPodName is in Error state on node $workloadNodeName"
101101
fi
102102

103103

104104
print_logs_title "Verify the new workload pod in Running state on other nodes or ContainerCreating state on the same node"
105105
verify_sample_workload_by_pod_status workloadPodNameNew workloadNodeNameNew "Running\|ContainerCreating"
106106
if [[ "$workloadPodName" == "$workloadPodNameNew" ]] ; then
107107
print_logs_error "New workload pod should be started, but still find old running pod $workloadPodName"
108-
print_debug_on_ERR
108+
print_debug
109109
fast_exit
110110
else
111111
print_logs_info "new workload pod $workloadPodNameNew started on another node $workloadNodeNameNew"
@@ -119,7 +119,7 @@ sleep $SleepInSecs
119119
podState=$(get_pod_state $NodePodNameKeyword $workloadNodeName)
120120
if [[ -z "$podState" || "$podState" != "Running" ]]; then
121121
print_logs_error "Lustre CSI node pod can't be started on $nodeName, state=$podState"
122-
print_debug_on_ERR
122+
print_debug
123123
fast_exit
124124
else
125125
print_logs_info "Lustre CSI node pod started on $nodeName again"
@@ -132,7 +132,7 @@ sleep $SleepInSecs
132132
podState=$(get_pod_state $workloadPodName $workloadNodeName)
133133
if [[ ! -z $podState ]]; then
134134
print_logs_error "Still can find workload pod $workloadPodName in $podState state on node $workloadNodeName, it should be deleted successfully"
135-
print_debug_on_ERR
135+
print_debug
136136
fast_exit
137137
else
138138
print_logs_info "workload pod $workloadPodName has been deleted successfully from node $workloadNodeName"

0 commit comments

Comments
 (0)