Skip to content
Open
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
896ff1f
Add SwiftV2 long-running pipeline with scheduled tests
Nov 22, 2025
873c05e
Update readme file.
Nov 24, 2025
3395415
fix syntax for pe test.
Nov 24, 2025
b34b332
Create NSG rules with unique priority.
Dec 2, 2025
e9f50e6
Update go.mod
sivakami-projects Dec 5, 2025
8364bf5
Update test/integration/swiftv2/longRunningCluster/datapath_create_te…
sivakami-projects Dec 5, 2025
1d2ed59
Update test/integration/swiftv2/longRunningCluster/datapath_delete_te…
sivakami-projects Dec 5, 2025
efbfb02
Update test/integration/swiftv2/longRunningCluster/datapath_connectiv…
sivakami-projects Dec 5, 2025
04a22a0
Update test/integration/swiftv2/longRunningCluster/datapath_delete_te…
sivakami-projects Dec 5, 2025
56fbeb2
Error handling for private endpoint tests.
Dec 8, 2025
4d29aec
Private endpoint tests.
Dec 8, 2025
a1baf08
update private endpoint test.
Dec 8, 2025
0945c2c
update pod.yaml
Dec 8, 2025
7df1c79
Check if mtpnc is cleaned up after pods are deleted.
Dec 8, 2025
b37b033
Update vnet names.
Dec 8, 2025
2672caa
add container readiness check.
Dec 8, 2025
9d27d43
update pod.yaml
Dec 8, 2025
95ff010
Update pod.yaml
Dec 8, 2025
c1bd2e6
Update connectivity test.
Dec 8, 2025
7bdf1b0
Update netcat curl test.
Dec 9, 2025
a27aa52
Enable delete pods.
Dec 9, 2025
4f32773
Remove test changes.
Dec 9, 2025
feb46e4
remove test changes for storage accounts.
Dec 9, 2025
3b9bc5c
update go.mod
Dec 11, 2025
de09b98
Make dockerfiles.
Dec 11, 2025
d3c4686
lint fixes
Dec 12, 2025
4adcb1a
Merge branch 'master' into sv2-long-running-pipeline-stage2
sivakami-projects Dec 12, 2025
e08fa01
update dockerfiles.
Dec 12, 2025
066ba2c
Lint fix.
Dec 12, 2025
6688685
reset package name.
Dec 12, 2025
cf7173b
fix package name.
Dec 12, 2025
e51230d
refactor: clean up long-running pipeline and update tests
Dec 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pipelines/build/dockerfiles/cns.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ ENTRYPOINT ["azure-cns.exe"]
EXPOSE 10090

# mcr.microsoft.com/azurelinux/base/core:3.0
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:3d53b96f4e336a197023bda703a056eaefecc6728e9a2b0c1ef42f7dce183338 AS build-helper
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/base/core@sha256:ee7f76ce3febc06e79c1a3776178b36bea62f76da43f0d58c30d5974d0ec3dbf AS build-helper
RUN tdnf install -y iptables

# mcr.microsoft.com/azurelinux/distroless/minimal:3.0
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/distroless/minimal@sha256:6b78aa535a2a5107ee308b767c0f1f5055a58d0e751f9d87543bc504da6d0ed3 AS linux
FROM --platform=linux/${ARCH} mcr.microsoft.com/azurelinux/distroless/minimal@sha256:810f96c73cfbe47690b54eb4f3cea57ec0467e413f1fd068a234746a95a1c27e AS linux
ARG ARTIFACT_DIR .

COPY --from=build-helper /usr/sbin/*tables* /usr/sbin/
Expand Down
438 changes: 438 additions & 0 deletions .pipelines/swiftv2-long-running/README.md

Large diffs are not rendered by default.

43 changes: 24 additions & 19 deletions .pipelines/swiftv2-long-running/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,47 @@
trigger: none
pr: none

# Schedule: Run every 1 hour
schedules:
- cron: "0 */3 * * *" # Every 3 hours at minute 0
displayName: "Run tests every 3 hours"
branches:
include:
- sv2-long-running-pipeline-stage2
Comment on lines +8 to +10
Copy link
Contributor

@jpayne3506 jpayne3506 Dec 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the intent to have a separate CI/CD branch for these long running tests?

always: true # Run even if there are no code changes

parameters:
- name: subscriptionId
displayName: "Azure Subscription ID"
type: string
default: "37deca37-c375-4a14-b90a-043849bd2bf1"

- name: serviceConnection
displayName: "Azure Service Connection"
type: string
default: "Azure Container Networking - Standalone Test Service Connection"

- name: location
displayName: "Deployment Region"
type: string
default: "centraluseuap"

- name: resourceGroupName
displayName: "Resource Group Name"
type: string
default: "long-run-$(Build.BuildId)"

- name: vmSkuDefault
displayName: "VM SKU for Default Node Pool"
type: string
default: "Standard_D2s_v3"

- name: vmSkuHighNIC
displayName: "VM SKU for High NIC Node Pool"
type: string
default: "Standard_D16s_v3"
- name: runSetupStages
displayName: "Create New Infrastructure Setup"
type: boolean
default: false

- name: serviceConnection
displayName: "Azure Service Connection"
# Setup-only parameters (only used when runSetupStages=true)
- name: resourceGroupName
displayName: "Resource Group Name used when Create new Infrastructure Setup is selected"
type: string
default: "Azure Container Networking - Standalone Test Service Connection"
default: "sv2-long-run-$(Build.BuildId)"

extends:
template: template/long-running-pipeline-template.yaml
parameters:
subscriptionId: ${{ parameters.subscriptionId }}
location: ${{ parameters.location }}
resourceGroupName: ${{ parameters.resourceGroupName }}
vmSkuDefault: ${{ parameters.vmSkuDefault }}
vmSkuHighNIC: ${{ parameters.vmSkuHighNIC }}
serviceConnection: ${{ parameters.serviceConnection }}
runSetupStages: ${{ parameters.runSetupStages }}
144 changes: 100 additions & 44 deletions .pipelines/swiftv2-long-running/scripts/create_aks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,113 @@ RG=$3
VM_SKU_DEFAULT=$4
VM_SKU_HIGHNIC=$5

CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"
DEFAULT_NODE_COUNT=1
COMMON_TAGS="fastpathenabled=true RGOwner=LongRunningTestPipelines stampcreatorserviceinfo=true"

wait_for_provisioning() { # Helper for safe retry/wait for provisioning states (basic)
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg' to reach Succeeded/Failed (polling)..."
CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"


stamp_vnet() {
local vnet_id="$1"

responseFile="response.txt"
modified_vnet="${vnet_id//\//%2F}"
cmd_stamp_curl="'curl -v -X PUT http://localhost:8080/VirtualNetwork/$modified_vnet/stampcreatorservicename'"
cmd_containerapp_exec="az containerapp exec -n subnetdelegator-westus-u3h4j -g subnetdelegator-westus --subscription 9b8218f9-902a-4d20-a65c-e98acec5362f --command $cmd_stamp_curl"
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same hardcoded credentials issue. The script contains hardcoded subscription ID 9b8218f9-902a-4d20-a65c-e98acec5362f and references to subnetdelegator-westus-u3h4j container app. Consider parameterizing these values.

Copilot uses AI. Check for mistakes.

max_retries=10
sleep_seconds=15
retry_count=0

while [[ $retry_count -lt $max_retries ]]; do
script --quiet -c "$cmd_containerapp_exec" "$responseFile"
if grep -qF "200 OK" "$responseFile"; then
echo "Subnet Delegator successfully stamped the vnet"
return 0
else
echo "Subnet Delegator failed to stamp the vnet, attempt $((retry_count+1))"
cat "$responseFile"
retry_count=$((retry_count+1))
sleep "$sleep_seconds"
fi
done

echo "Failed to stamp the vnet even after $max_retries attempts"
exit 1
}

wait_for_provisioning() {
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg'..."
while :; do
state=$(az aks show --resource-group "$rg" --name "$clusterName" --query provisioningState -o tsv 2>/dev/null || true)
if [ -z "$state" ]; then
sleep 3
continue
if [[ "$state" =~ Succeeded ]]; then
echo "Provisioning state: $state"
break
fi
case "$state" in
Succeeded|Succeeded*) echo "Provisioning state: $state"; break ;;
Failed|Canceled|Rejected) echo "Provisioning finished with state: $state"; break ;;
*) printf "."; sleep 6 ;;
esac
if [[ "$state" =~ Failed|Canceled ]]; then
echo "Provisioning finished with state: $state"
break
fi
sleep 6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we look at leveraging another option besides sleep

done
}


#########################################
# Main script starts here
#########################################

for i in $(seq 1 "$CLUSTER_COUNT"); do
echo "=============================="
echo " Working on cluster set #$i"
echo "=============================="

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"
echo "Creating AKS cluster '$CLUSTER_NAME' in RG '$RG'"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
NODE_COUNT=$DEFAULT_NODE_COUNT \
VM_SIZE=$VM_SKU_DEFAULT \

echo " - waiting for AKS provisioning state..."
wait_for_provisioning "$RG" "$CLUSTER_NAME"

echo "Adding multi-tenant nodepool ' to '$CLUSTER_NAME'"
make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID \
echo "Creating cluster #$i..."

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

# Create cluster with SkipAutoDeleteTill tag for persistent infrastructure
make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
VM_SIZE=$VM_SKU_DEFAULT

# Add SkipAutoDeleteTill tag to cluster (2032-12-31 for long-term persistence)
az aks update -g "$RG" -n "$CLUSTER_NAME" --tags SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to cluster"

wait_for_provisioning "$RG" "$CLUSTER_NAME"

vnet_id=$(az network vnet show -g "$RG" --name "$CLUSTER_NAME" --query id -o tsv)
echo "Found VNET: $vnet_id"

# Add SkipAutoDeleteTill tag to AKS VNet
az network vnet update --ids "$vnet_id" --set tags.SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to vnet"

stamp_vnet "$vnet_id"

make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID

az aks get-credentials -g "$RG" -n "$CLUSTER_NAME" --admin --overwrite-existing \
--file "/tmp/${CLUSTER_NAME}.kubeconfig"

# Label all nodes with workload-type and nic-capacity labels
echo "==> Labeling all nodes in $CLUSTER_NAME with workload-type=swiftv2-linux"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes --all workload-type=swiftv2-linux --overwrite
echo "[OK] All nodes labeled with workload-type=swiftv2-linux"

# Label default nodepool (nodepool1) with low-nic capacity
echo "==> Labeling default nodepool (nodepool1) nodes with nic-capacity=low-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nodepool1 nic-capacity=low-nic --overwrite
echo "[OK] Default nodepool nodes labeled with nic-capacity=low-nic"

# Label nplinux nodepool with high-nic capacity
echo "==> Labeling nplinux nodepool nodes with nic-capacity=high-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nplinux nic-capacity=high-nic --overwrite
echo "[OK] nplinux nodepool nodes labeled with nic-capacity=high-nic"
done
echo "All done. Created $CLUSTER_COUNT cluster set(s)."

echo "All clusters complete."
Loading
Loading