Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
896ff1f
Add SwiftV2 long-running pipeline with scheduled tests
Nov 22, 2025
873c05e
Update readme file.
Nov 24, 2025
3395415
fix syntax for pe test.
Nov 24, 2025
b34b332
Create NSG rules with unique priority.
Dec 2, 2025
f0d74b6
Scale tests - create 15 pods across 2 clusters.
Dec 2, 2025
e9f50e6
Update go.mod
sivakami-projects Dec 5, 2025
8364bf5
Update test/integration/swiftv2/longRunningCluster/datapath_create_te…
sivakami-projects Dec 5, 2025
1d2ed59
Update test/integration/swiftv2/longRunningCluster/datapath_delete_te…
sivakami-projects Dec 5, 2025
efbfb02
Update test/integration/swiftv2/longRunningCluster/datapath_connectiv…
sivakami-projects Dec 5, 2025
04a22a0
Update test/integration/swiftv2/longRunningCluster/datapath_delete_te…
sivakami-projects Dec 5, 2025
56fbeb2
Error handling for private endpoint tests.
Dec 8, 2025
a82dc01
Merge branch 'sv2-long-running-pipeline-stage2' into sv2-long-running…
Dec 8, 2025
0abf219
Fix scale tests.
Dec 8, 2025
ad85ecc
private endpoint tests.
Dec 8, 2025
8266188
remove duplicate statements.
Dec 8, 2025
b50005d
Private endpoint tests.
Dec 8, 2025
afa8280
wait for pods to be scheduled in scale tests.
Dec 8, 2025
fef7708
update pod image.
Dec 8, 2025
5abacae
Private endpoint tests.
Dec 8, 2025
df1dc33
update private endpoint test.
Dec 8, 2025
03bd24b
Private endpoint tests.
Dec 8, 2025
35a68da
update private endpoint test.
Dec 8, 2025
f74b1b2
update pod.yaml
Dec 8, 2025
e04da22
Check if mtpnc is cleaned up after pods are deleted.
Dec 8, 2025
595a3c5
Update vnet names.
Dec 8, 2025
107f2c3
add container readiness check.
Dec 8, 2025
8a773ff
update pod.yaml
Dec 8, 2025
e9bdcfb
Update pod.yaml
Dec 8, 2025
f751a45
Update connectivity test.
Dec 8, 2025
ff2c7b3
Update netcat curl test.
Dec 9, 2025
c3d2743
Remove test changes.
Dec 9, 2025
73471f7
update datapath.go
Dec 9, 2025
e8d62bf
Fix vnet names.
Dec 9, 2025
93fbd68
Run scale tests after private endpoint tests.
Dec 9, 2025
80a86c0
start with small bursts for scale tests.
Dec 10, 2025
b2c2fae
Set reservation size base test scenario.
Dec 10, 2025
f7dc949
Delete pod resources created for scale tests.
Dec 10, 2025
f532afa
test change.
Dec 10, 2025
434cd06
Reuse pod network for creating pods for scale tests.
Dec 10, 2025
8b71f93
scale test update
Dec 10, 2025
a00bb45
fix imports
Dec 10, 2025
5580154
Specify pod count per node.
Dec 10, 2025
074d593
scale test increase pod count.
Dec 11, 2025
dd5beed
Private endpoint tests.
Dec 8, 2025
789ed59
update pod.yaml
Dec 8, 2025
be5612d
Check if mtpnc is cleaned up after pods are deleted.
Dec 8, 2025
8dc9e4e
add container readiness check.
Dec 8, 2025
6dff2ca
update pod.yaml
Dec 8, 2025
c0f3841
Update pod.yaml
Dec 8, 2025
236a874
Update connectivity test.
Dec 8, 2025
6b1cb81
Update netcat curl test.
Dec 9, 2025
8498946
Enable delete pods.
Dec 9, 2025
2ba3f3b
Remove test changes.
Dec 9, 2025
59a0826
remove test changes for storage accounts.
Dec 9, 2025
4fed155
update go.mod
Dec 11, 2025
e7c9933
Make dockerfiles.
Dec 11, 2025
62347dc
lint fixes
Dec 12, 2025
58f35f3
Lint fix.
Dec 12, 2025
23bd5f6
Merge branch 'master' into sv2-long-running-pipeline-scaletests
sivakami-projects Dec 12, 2025
0d161af
make dockerfiles.
Dec 12, 2025
86d1c37
fix generateSaStoken method.
Dec 12, 2025
d71032f
linter fix.
Dec 12, 2025
89c79ad
Fix display name
Dec 12, 2025
743a3f8
fix package name.
Dec 12, 2025
e21cbec
Enable scale tests.
Dec 12, 2025
bb13603
set pipeline job order.
Dec 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pipelines/build/dockerfiles/cns.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ ENTRYPOINT ["azure-cns.exe"]
EXPOSE 10090

# mcr.microsoft.com/azurelinux/base/core:3.0
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:3d53b96f4e336a197023bda703a056eaefecc6728e9a2b0c1ef42f7dce183338 AS build-helper
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:ee7f76ce3febc06e79c1a3776178b36bea62f76da43f0d58c30d5974d0ec3dbf AS build-helper
RUN tdnf install -y iptables

# mcr.microsoft.com/azurelinux/distroless/minimal:3.0
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/distroless/minimal@sha256:6b78aa535a2a5107ee308b767c0f1f5055a58d0e751f9d87543bc504da6d0ed3 AS linux
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/distroless/minimal@sha256:810f96c73cfbe47690b54eb4f3cea57ec0467e413f1fd068a234746a95a1c27e AS linux
ARG ARTIFACT_DIR .

COPY --from=build-helper /usr/sbin/*tables* /usr/sbin/
Expand Down
438 changes: 438 additions & 0 deletions .pipelines/swiftv2-long-running/README.md

Large diffs are not rendered by default.

45 changes: 25 additions & 20 deletions .pipelines/swiftv2-long-running/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,47 @@
trigger: none
pr: none

# Schedule: Run every 1 hour
schedules:
- cron: "0 */2 * * *" # Every 2 hours at minute 0
displayName: "Run tests every 2 hours"
branches:
include:
- sv2-long-running-pipeline-scaletests
always: true # Run even if there are no code changes

parameters:
- name: subscriptionId
displayName: "Azure Subscription ID"
type: string
default: "37deca37-c375-4a14-b90a-043849bd2bf1"

- name: location
displayName: "Deployment Region"
type: string
default: "centraluseuap"

- name: resourceGroupName
displayName: "Resource Group Name"
- name: serviceConnection
displayName: "Azure Service Connection"
type: string
default: "long-run-$(Build.BuildId)"
default: "Azure Container Networking - Standalone Test Service Connection"

- name: vmSkuDefault
displayName: "VM SKU for Default Node Pool"
- name: location
displayName: "Deployment Region"
type: string
default: "Standard_D2s_v3"
default: "eastus2"

- name: vmSkuHighNIC
displayName: "VM SKU for High NIC Node Pool"
type: string
default: "Standard_D16s_v3"
- name: runSetupStages
displayName: "Create New Infrastructure Setup"
type: boolean
default: false

- name: serviceConnection
displayName: "Azure Service Connection"
# Setup-only parameters (only used when runSetupStages=true)
- name: resourceGroupName
displayName: "Resource Group Name used when Create new Infrastructure Setup is selected"
type: string
default: "Azure Container Networking - Standalone Test Service Connection"
default: "sv2-long-run-$(Build.BuildId)"

extends:
template: template/long-running-pipeline-template.yaml
parameters:
subscriptionId: ${{ parameters.subscriptionId }}
location: ${{ parameters.location }}
resourceGroupName: ${{ parameters.resourceGroupName }}
vmSkuDefault: ${{ parameters.vmSkuDefault }}
vmSkuHighNIC: ${{ parameters.vmSkuHighNIC }}
serviceConnection: ${{ parameters.serviceConnection }}
runSetupStages: ${{ parameters.runSetupStages }}
146 changes: 102 additions & 44 deletions .pipelines/swiftv2-long-running/scripts/create_aks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,115 @@ RG=$3
VM_SKU_DEFAULT=$4
VM_SKU_HIGHNIC=$5

CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"
DEFAULT_NODE_COUNT=1
COMMON_TAGS="fastpathenabled=true RGOwner=LongRunningTestPipelines stampcreatorserviceinfo=true"

wait_for_provisioning() { # Helper for safe retry/wait for provisioning states (basic)
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg' to reach Succeeded/Failed (polling)..."
PODS_PER_NODE=7
CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"


stamp_vnet() {
local vnet_id="$1"

responseFile="response.txt"
modified_vnet="${vnet_id//\//%2F}"
cmd_stamp_curl="'curl -v -X PUT http://localhost:8080/VirtualNetwork/$modified_vnet/stampcreatorservicename'"
cmd_containerapp_exec="az containerapp exec -n subnetdelegator-westus-u3h4j -g subnetdelegator-westus --subscription 9b8218f9-902a-4d20-a65c-e98acec5362f --command $cmd_stamp_curl"

max_retries=10
sleep_seconds=15
retry_count=0

while [[ $retry_count -lt $max_retries ]]; do
script --quiet -c "$cmd_containerapp_exec" "$responseFile"
if grep -qF "200 OK" "$responseFile"; then
echo "Subnet Delegator successfully stamped the vnet"
return 0
else
echo "Subnet Delegator failed to stamp the vnet, attempt $((retry_count+1))"
cat "$responseFile"
retry_count=$((retry_count+1))
sleep "$sleep_seconds"
fi
done

echo "Failed to stamp the vnet even after $max_retries attempts"
exit 1
}

wait_for_provisioning() {
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg'..."
while :; do
state=$(az aks show --resource-group "$rg" --name "$clusterName" --query provisioningState -o tsv 2>/dev/null || true)
if [ -z "$state" ]; then
sleep 3
continue
if [[ "$state" =~ Succeeded ]]; then
echo "Provisioning state: $state"
break
fi
case "$state" in
Succeeded|Succeeded*) echo "Provisioning state: $state"; break ;;
Failed|Canceled|Rejected) echo "Provisioning finished with state: $state"; break ;;
*) printf "."; sleep 6 ;;
esac
if [[ "$state" =~ Failed|Canceled ]]; then
echo "Provisioning finished with state: $state"
break
fi
sleep 6
done
}


#########################################
# Main script starts here
#########################################

for i in $(seq 1 "$CLUSTER_COUNT"); do
echo "=============================="
echo " Working on cluster set #$i"
echo "=============================="

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"
echo "Creating AKS cluster '$CLUSTER_NAME' in RG '$RG'"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
NODE_COUNT=$DEFAULT_NODE_COUNT \
VM_SIZE=$VM_SKU_DEFAULT \

echo " - waiting for AKS provisioning state..."
wait_for_provisioning "$RG" "$CLUSTER_NAME"

echo "Adding multi-tenant nodepool ' to '$CLUSTER_NAME'"
make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID \
echo "Creating cluster #$i..."

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

# Create cluster with SkipAutoDeleteTill tag for persistent infrastructure
make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
VM_SIZE=$VM_SKU_DEFAULT

# Add SkipAutoDeleteTill tag to cluster (2032-12-31 for long-term persistence)
az aks update -g "$RG" -n "$CLUSTER_NAME" --tags SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to cluster"

wait_for_provisioning "$RG" "$CLUSTER_NAME"

vnet_id=$(az network vnet show -g "$RG" --name "$CLUSTER_NAME" --query id -o tsv)
echo "Found VNET: $vnet_id"

# Add SkipAutoDeleteTill tag to AKS VNet
az network vnet update --ids "$vnet_id" --set tags.SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to vnet"

stamp_vnet "$vnet_id"

make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
PODS_PER_NODE=$PODS_PER_NODE \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID

az aks get-credentials -g "$RG" -n "$CLUSTER_NAME" --admin --overwrite-existing \
--file "/tmp/${CLUSTER_NAME}.kubeconfig"

# Label all nodes with workload-type and nic-capacity labels
echo "==> Labeling all nodes in $CLUSTER_NAME with workload-type=swiftv2-linux"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes --all workload-type=swiftv2-linux --overwrite
echo "[OK] All nodes labeled with workload-type=swiftv2-linux"

# Label default nodepool (nodepool1) with low-nic capacity
echo "==> Labeling default nodepool (nodepool1) nodes with nic-capacity=low-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nodepool1 nic-capacity=low-nic --overwrite
echo "[OK] Default nodepool nodes labeled with nic-capacity=low-nic"

# Label nplinux nodepool with high-nic capacity
echo "==> Labeling nplinux nodepool nodes with nic-capacity=high-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nplinux nic-capacity=high-nic --overwrite
echo "[OK] nplinux nodepool nodes labeled with nic-capacity=high-nic"
done
echo "All done. Created $CLUSTER_COUNT cluster set(s)."

echo "All clusters complete."
Loading
Loading