Skip to content

Commit d8f5ff2

Browse files
committed
Added immediate pipeline pod capture and improved workflow pod log collection in test workflows for enhanced debugging
Signed-off-by: Helber Belmiro <[email protected]>
1 parent b2beb4c commit d8f5ff2

File tree

2 files changed

+124
-1
lines changed

2 files changed

+124
-1
lines changed

.github/resources/scripts/collect-enhanced-logs.sh

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,81 @@ function collect_comprehensive_logs {
120120
kubectl get runs -n "${NAMESPACE}" -o wide --show-labels >> "$OUTPUT_FILE" 2>&1 || echo "No pipeline runs found or CRD not available" >> "$OUTPUT_FILE"
121121
echo "" >> "$OUTPUT_FILE"
122122

123+
# 7. Collect logs from recently failed/completed workflow pods
124+
echo "===== WORKFLOW/PIPELINE POD LOGS =====" >> "$OUTPUT_FILE"
125+
126+
# Get pods that look like pipeline execution pods (containing workflow-like names)
127+
local WORKFLOW_PODS
128+
WORKFLOW_PODS=$(kubectl get pods -n "${NAMESPACE}" -o jsonpath='{range .items[?(@.metadata.labels.workflows\.argoproj\.io/workflow)]}{.metadata.name}{" "}{.status.phase}{" "}{.metadata.labels.workflows\.argoproj\.io/workflow}{"\n"}{end}' 2>/dev/null || echo "")
129+
130+
if [[ -n "$WORKFLOW_PODS" ]]; then
131+
echo "Found Argo Workflow pods:" >> "$OUTPUT_FILE"
132+
echo "$WORKFLOW_PODS" >> "$OUTPUT_FILE"
133+
echo "" >> "$OUTPUT_FILE"
134+
135+
# Collect logs from workflow pods
136+
while IFS= read -r line; do
137+
if [[ -n "$line" ]]; then
138+
local pod_name=$(echo "$line" | awk '{print $1}')
139+
local pod_phase=$(echo "$line" | awk '{print $2}')
140+
local workflow_name=$(echo "$line" | awk '{print $3}')
141+
142+
echo "--- Workflow Pod: $pod_name (Phase: $pod_phase, Workflow: $workflow_name) ---" >> "$OUTPUT_FILE"
143+
kubectl logs "$pod_name" -n "${NAMESPACE}" --previous=false >> "$OUTPUT_FILE" 2>&1 || echo "No current logs for $pod_name" >> "$OUTPUT_FILE"
144+
145+
# Also try to get previous logs if pod restarted
146+
kubectl logs "$pod_name" -n "${NAMESPACE}" --previous=true >> "$OUTPUT_FILE" 2>&1 || echo "No previous logs for $pod_name" >> "$OUTPUT_FILE"
147+
echo "" >> "$OUTPUT_FILE"
148+
fi
149+
done <<< "$WORKFLOW_PODS"
150+
else
151+
echo "No Argo Workflow pods found with workflow labels" >> "$OUTPUT_FILE"
152+
153+
# Fallback: look for pods with workflow-like naming patterns
154+
echo "Searching for pods with workflow-like names..." >> "$OUTPUT_FILE"
155+
local PATTERN_PODS
156+
PATTERN_PODS=$(kubectl get pods -n "${NAMESPACE}" -o name 2>/dev/null | grep -E "(pipeline|workflow|producer|consumer)" || echo "")
157+
158+
if [[ -n "$PATTERN_PODS" ]]; then
159+
echo "Found workflow-pattern pods:" >> "$OUTPUT_FILE"
160+
for pod_name in $PATTERN_PODS; do
161+
pod_name=$(echo "$pod_name" | sed 's|pod/||')
162+
echo "--- Pattern Pod: $pod_name ---" >> "$OUTPUT_FILE"
163+
kubectl logs "$pod_name" -n "${NAMESPACE}" --tail=100 >> "$OUTPUT_FILE" 2>&1 || echo "No logs for $pod_name" >> "$OUTPUT_FILE"
164+
echo "" >> "$OUTPUT_FILE"
165+
done
166+
else
167+
echo "No workflow-pattern pods found" >> "$OUTPUT_FILE"
168+
fi
169+
fi
170+
echo "" >> "$OUTPUT_FILE"
171+
172+
# 8. Collect all pod logs from user namespace if different and in multi-user mode
173+
if [[ -n "$TEST_CONTEXT" && "$TEST_CONTEXT" == *"MultiUser"* ]]; then
174+
echo "===== CHECKING USER NAMESPACE PODS =====" >> "$OUTPUT_FILE"
175+
# Common user namespace patterns
176+
for user_ns in "kubeflow-user-example-com" "kubeflow-user-test" "default"; do
177+
if kubectl get namespace "$user_ns" &>/dev/null && [[ "$user_ns" != "$NAMESPACE" ]]; then
178+
echo "Found user namespace: $user_ns" >> "$OUTPUT_FILE"
179+
kubectl get pods -n "$user_ns" -o wide --show-labels >> "$OUTPUT_FILE" 2>&1 || true
180+
181+
# Get user namespace workflow pods
182+
local USER_WORKFLOW_PODS
183+
USER_WORKFLOW_PODS=$(kubectl get pods -n "$user_ns" -o name 2>/dev/null | grep -E "(pipeline|workflow|producer|consumer)" || echo "")
184+
185+
if [[ -n "$USER_WORKFLOW_PODS" ]]; then
186+
echo "User namespace workflow pods:" >> "$OUTPUT_FILE"
187+
for pod_name in $USER_WORKFLOW_PODS; do
188+
pod_name=$(echo "$pod_name" | sed 's|pod/||')
189+
echo "--- User NS Pod: $pod_name ---" >> "$OUTPUT_FILE"
190+
kubectl logs "$pod_name" -n "$user_ns" --tail=100 >> "$OUTPUT_FILE" 2>&1 || echo "No logs for $pod_name" >> "$OUTPUT_FILE"
191+
echo "" >> "$OUTPUT_FILE"
192+
done
193+
fi
194+
fi
195+
done
196+
fi
197+
123198
echo "Enhanced log collection completed. Output saved to: $OUTPUT_FILE"
124199
}
125200

.github/workflows/e2e-test.yml

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,21 @@ jobs:
168168
TEST_START_TIME=$(date -u -d '30 minutes ago' '+%Y-%m-%dT%H:%M:%SZ')
169169
TEST_CONTEXT="${{ matrix.test_label}}_K8s-${{ matrix.k8s_version }}_cache-${{ matrix.cache_enabled }}"
170170
171+
# First, immediately capture any pipeline pods that might still exist
172+
echo "=== IMMEDIATE PIPELINE POD CAPTURE ===" > /tmp/immediate_pod_logs.txt
173+
echo "Timestamp: $(date)" >> /tmp/immediate_pod_logs.txt
174+
echo "Searching for pipeline pods immediately after test failure..." >> /tmp/immediate_pod_logs.txt
175+
176+
# Capture any pipeline/workflow pods that exist right now
177+
kubectl get pods -n $NAMESPACE | grep -E "(pipeline|workflow|producer|consumer)" >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No pipeline pods found" >> /tmp/immediate_pod_logs.txt
178+
179+
# Get logs from any pods matching pipeline patterns
180+
for pod in $(kubectl get pods -n $NAMESPACE -o name 2>/dev/null | grep -E "(pipeline|workflow|producer|consumer)" | sed 's|pod/||'); do
181+
echo "--- Immediate Pod Logs: $pod ---" >> /tmp/immediate_pod_logs.txt
182+
kubectl logs "$pod" -n $NAMESPACE >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No logs for $pod" >> /tmp/immediate_pod_logs.txt
183+
echo "" >> /tmp/immediate_pod_logs.txt
184+
done
185+
171186
# Create enhanced log collection
172187
chmod +x ./.github/resources/scripts/collect-enhanced-logs.sh
173188
./.github/resources/scripts/collect-enhanced-logs.sh \
@@ -176,6 +191,9 @@ jobs:
176191
--test-context "$TEST_CONTEXT" \
177192
--start-time "$TEST_START_TIME"
178193
194+
# Combine immediate logs with enhanced logs
195+
cat /tmp/immediate_pod_logs.txt >> /tmp/enhanced_failure_logs.txt
196+
179197
# Also collect Ginkgo test output if available
180198
if [ -f "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" ]; then
181199
echo "=== GINKGO TEST RESULTS ===" >> /tmp/enhanced_failure_logs.txt
@@ -305,9 +323,37 @@ jobs:
305323
run: |
306324
echo "=== Collecting enhanced logs after test failure ==="
307325
NAMESPACE=${{ steps.configure.outputs.NAMESPACE }}
326+
USER_NS="${{ env.USER_NAMESPACE }}"
308327
TEST_START_TIME=$(date -u -d '30 minutes ago' '+%Y-%m-%dT%H:%M:%SZ')
309328
TEST_CONTEXT="MultiUser_K8s-${{ matrix.k8s_version }}_cache-${{ matrix.cache_enabled }}_storage-${{ matrix.storage }}"
310329
330+
# First, immediately capture any pipeline pods that might still exist in both namespaces
331+
echo "=== IMMEDIATE PIPELINE POD CAPTURE ===" > /tmp/immediate_pod_logs.txt
332+
echo "Timestamp: $(date)" >> /tmp/immediate_pod_logs.txt
333+
echo "Searching for pipeline pods immediately after test failure..." >> /tmp/immediate_pod_logs.txt
334+
335+
# Check main namespace
336+
echo "--- Main namespace ($NAMESPACE) ---" >> /tmp/immediate_pod_logs.txt
337+
kubectl get pods -n $NAMESPACE | grep -E "(pipeline|workflow|producer|consumer)" >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No pipeline pods found in $NAMESPACE" >> /tmp/immediate_pod_logs.txt
338+
339+
for pod in $(kubectl get pods -n $NAMESPACE -o name 2>/dev/null | grep -E "(pipeline|workflow|producer|consumer)" | sed 's|pod/||'); do
340+
echo "--- Immediate Pod Logs ($NAMESPACE): $pod ---" >> /tmp/immediate_pod_logs.txt
341+
kubectl logs "$pod" -n $NAMESPACE >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No logs for $pod" >> /tmp/immediate_pod_logs.txt
342+
echo "" >> /tmp/immediate_pod_logs.txt
343+
done
344+
345+
# Check user namespace if different
346+
if [ "$USER_NS" != "$NAMESPACE" ]; then
347+
echo "--- User namespace ($USER_NS) ---" >> /tmp/immediate_pod_logs.txt
348+
kubectl get pods -n "$USER_NS" | grep -E "(pipeline|workflow|producer|consumer)" >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No pipeline pods found in $USER_NS" >> /tmp/immediate_pod_logs.txt
349+
350+
for pod in $(kubectl get pods -n "$USER_NS" -o name 2>/dev/null | grep -E "(pipeline|workflow|producer|consumer)" | sed 's|pod/||'); do
351+
echo "--- Immediate Pod Logs ($USER_NS): $pod ---" >> /tmp/immediate_pod_logs.txt
352+
kubectl logs "$pod" -n "$USER_NS" >> /tmp/immediate_pod_logs.txt 2>&1 || echo "No logs for $pod" >> /tmp/immediate_pod_logs.txt
353+
echo "" >> /tmp/immediate_pod_logs.txt
354+
done
355+
fi
356+
311357
# Create enhanced log collection
312358
chmod +x ./.github/resources/scripts/collect-enhanced-logs.sh
313359
./.github/resources/scripts/collect-enhanced-logs.sh \
@@ -317,7 +363,6 @@ jobs:
317363
--start-time "$TEST_START_TIME"
318364
319365
# Also collect user namespace logs for multi-user tests
320-
USER_NS="${{ env.USER_NAMESPACE }}"
321366
if [ "$USER_NS" != "$NAMESPACE" ]; then
322367
echo "=== USER NAMESPACE LOGS ===" >> /tmp/enhanced_failure_logs.txt
323368
./.github/resources/scripts/collect-enhanced-logs.sh \
@@ -328,6 +373,9 @@ jobs:
328373
cat /tmp/user_ns_logs.txt >> /tmp/enhanced_failure_logs.txt 2>/dev/null || true
329374
fi
330375
376+
# Combine immediate logs with enhanced logs
377+
cat /tmp/immediate_pod_logs.txt >> /tmp/enhanced_failure_logs.txt
378+
331379
# Also collect Ginkgo test output if available
332380
if [ -f "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" ]; then
333381
echo "=== GINKGO TEST RESULTS ===" >> /tmp/enhanced_failure_logs.txt

0 commit comments

Comments
 (0)