Skip to content

Small feedback updates to video curation #1594

Small feedback updates to video curation

Small feedback updates to video curation #1594

Workflow file for this run

# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CICD NeMo Curator
on:
schedule:
- cron: 0 0 * * *
push:
branches:
- main
- "pull-request/[0-9]+"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
env:
UV_HTTP_TIMEOUT: 60
jobs:
pre-flight:
runs-on: ubuntu-latest
outputs:
is_ci_workload: ${{ steps.is_ci_workload.outputs.main }}
no_fail_fast: ${{ steps.no_fail_fast.outputs.main }}
docs_only: ${{ steps.docs_only.outputs.main == 'true' }}
env:
TESTS_TO_RUN: ${{ inputs.test_to_run }}
EVENT_NAME: ${{ github.event_name }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Get PR info
id: get-pr-info
if: startsWith(github.ref, 'refs/heads/pull-request/')
uses: nv-gha-runners/get-pr-info@main
- name: Determine base reference
id: base-ref
run: |
echo "base=${{ (startsWith(github.ref, 'refs/heads/pull-request/') && fromJSON(steps.get-pr-info.outputs.pr-info).base.ref) || 'HEAD~1' }}" >> $GITHUB_OUTPUT
- name: Get changed files
id: changed-files
uses: step-security/[email protected]
with:
files: |
nemo_curator/**
.github/**
pyproject.toml
docker/**
tests/**
base_sha: ${{ steps.base-ref.outputs.base }}
- name: Check if docs only
shell: bash
id: docs_only
env:
DOCS_ONLY: ${{ steps.changed-files.outputs.any_changed == 'false' }}
run: |
echo "main=$DOCS_ONLY" | tee -a "$GITHUB_OUTPUT"
- name: Check if this is a CI workload
shell: bash
id: is_ci_workload
run: |
branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
if [[ "$branch_name" =~ ^bump-ci-container || "$EVENT_NAME" == "schedule" ]]; then
is_ci_workload=true
echo "main=true" | tee -a "$GITHUB_OUTPUT"
else
is_ci_workload=false
fi
echo "main=$is_ci_workload" | tee -a "$GITHUB_OUTPUT"
- name: Check if no-fail-fast is set
shell: bash
id: no_fail_fast
env:
HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }}
run: |
if [[ "$HAS_FAIL_FAST_LABEL" == "true" || "$EVENT_NAME" == "schedule" ]]; then
no_fail_fast=true
else
no_fail_fast=false
fi
echo "main=$no_fail_fast" | tee -a "$GITHUB_OUTPUT"
cicd-wait-in-queue:
needs: [pre-flight]
runs-on: ubuntu-latest
environment: test
if: |
needs.pre-flight.outputs.test_to_run != '[]'
&& needs.pre-flight.outputs.is_ci_workload == 'false'
&& needs.pre-flight.outputs.docs_only == 'false'
steps:
- name: Running CI tests
run: |
echo "Running CI tests"
cicd-cpu-tests:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.10", "3.12"]
folder: ["backends", "core", "models", "pipelines", "stages-audio", "stages-common", "stages-deduplication", "stages-image", "stages-synthetic", "stages-text", "stages-video", "tasks", "utils"]
needs: [pre-flight, cicd-wait-in-queue]
runs-on: ${{ matrix.os }}
name: Unit_Test_${{ matrix.folder}}_CPU_python-${{ matrix.python-version }}
environment: nemo-ci
if: |
(
success()
|| needs.pre-flight.outputs.is_ci_workload == 'true'
|| needs.pre-flight.outputs.force_run_all == 'true'
)
&& !cancelled()
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/checkout@v4
with:
repository: OpenGVLab/InternVideo
path: InternVideo
ref: 09d872e5093296c6f36b8b3a91fc511b76433bf7
- name: Optionally free up space on Ubuntu
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Set up uv with Python ${{ matrix.python-version }}
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
- name: Run tests ${{ matrix.folder }} (CPU)
timeout-minutes: 30
run: |
cd InternVideo && patch -p1 < ../external/intern_video2_multimodal.patch && cd .. && sed -i "/InternVideo/d" .gitignore
uv sync --link-mode copy --locked --extra audio_cpu --extra text_cpu --extra video_cpu --group test
uv add InternVideo/InternVideo2/multi_modality
FOLDER="${{ matrix.folder }}"
FOLDER="${FOLDER/stages-/stages/}"
uv run coverage run --branch --source=nemo_curator -m pytest -v "tests/$FOLDER" -m "not gpu"
- name: Generate report
id: check
shell: bash
run: |
uv run coverage xml
uv run coverage report
coverage_report=coverage-unit-test-${{ matrix.folder }}-${{ github.run_id }}-$(uuidgen)
echo "$coverage_report"
echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT"
- name: Upload artifacts
uses: actions/upload-artifact@v4
if: ${{ steps.check.outputs.coverage_report != 'none' }}
with:
name: ${{ steps.check.outputs.coverage_report }}
path: |
./coverage.xml
./.coverage
include-hidden-files: true
cicd-gpu-tests:
strategy:
fail-fast: false
matrix:
include:
- script: L0_Unit_Test_GPU
runner: self-hosted-nemo
needs: [cicd-cpu-tests]
runs-on: ${{ matrix.runner }}
name: ${{ matrix.script }}
environment: nemo-ci
if: |
(
success()
|| needs.pre-flight.outputs.is_ci_workload == 'true'
|| needs.pre-flight.outputs.force_run_all == 'true'
)
&& !cancelled()
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
- name: main
uses: ./.github/actions/test-template
with:
script: ${{ matrix.script }}
is_unit_test: "false"
has-azure-credentials: "true"
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
PAT: ${{ secrets.PAT }}
timeout: 20
Nemo_CICD_Test:
needs:
- pre-flight
- cicd-cpu-tests
- cicd-gpu-tests
if: |
(
needs.pre-flight.outputs.docs_only == 'true'
|| success()
)
&& !cancelled()
runs-on: ubuntu-latest
permissions: write-all
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get workflow result
id: result
env:
GH_TOKEN: ${{ github.token }}
RUN_ID: ${{ github.run_id }}
DOCS_ONLY: ${{ needs.pre-flight.outputs.docs_only }}
run: |
# Get workflow run details and check job conclusions
NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "failure") | .name] | length')
NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "cancelled") | .name] | length')
if [[ ($NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0) || $DOCS_ONLY == 'true' ]]; then
RESULT="success"
elif [[ $NUM_CANCELLED -gt 0 ]]; then
RESULT="cancelled"
else
RESULT="failure"
fi
# Output the final status
echo "code=$RESULT" | tee -a $GITHUB_OUTPUT
- name: Checkout for GH CLI
uses: actions/checkout@v4
- name: Remove label if not cancelled
if: |
steps.result.outputs.code != 'cancelled'
&& github.event.label.name == 'Run CICD'
&& github.event.pull_request.head.repo.full_name == github.repository
env:
GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.number }}
run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD"
- name: Pipeline successful, add PR comment
if: |
steps.result.outputs.code == 'success'
&& github.event_name == 'pull_request'
&& env.SLACK_WEBHOOK != ''
uses: peter-evans/create-or-update-comment@v4
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
with:
issue-number: ${{ github.event.number }}
body: |
[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,
We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully.
So it might be time to merge this PR or get some approvals.
//cc @NVIDIA-NeMo/automation
- name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
if: |
steps.result.outputs.code == 'failure'
&& github.event.label.name == 'Run CICD'
&& env.SLACK_WEBHOOK != ''
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
PR_NUMBER: ${{ github.event.number }}
SERVER_URL: ${{ github.server_url }}
run: |
set -x
pip install PyGithub
export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
python .github/scripts/notify.py
- name: Exit
if: ${{ always() }}
env:
RESULT: ${{ steps.result.outputs.code }}
run: |
if [ $RESULT == "success" ]; then
exit 0
else
exit 1
fi
Coverage:
runs-on: ubuntu-latest
needs: [pre-flight, Nemo_CICD_Test]
if: |
needs.pre-flight.outputs.test_to_run != '[]'
&& needs.pre-flight.outputs.docs_only == 'false'
&& (
success()
|| needs.Nemo_CICD_Test.result == 'success'
)
&& !cancelled()
strategy:
matrix:
flag: [unit-test, e2e]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download coverage reports of current branch
uses: actions/download-artifact@v4
with:
pattern: coverage-${{ matrix.flag }}-*
- name: Get total coverage of current branch
shell: bash -x -e -u -o pipefail {0}
if: always()
run: |
pip install coverage[toml]
ls -al .
ls -al coverage-*/
coverage combine --keep $(ls coverage-*/.coverage)
coverage report -i
rm -rf coverage-*
ls -al
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
flags: ${{ matrix.flag }}
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: coverage-${{ matrix.flag }}-aggregated
path: |
.coverage
include-hidden-files: true
codecov-placeholder:
name: codecov/patch
needs: [pre-flight]
if: needs.pre-flight.outputs.docs_only == 'true'
runs-on: ubuntu-latest
steps:
- name: codecov_placeholder
run: |
echo "This is a placeholder status check for when no tests are ran but the codecov status is expected"