Skip to content

Qwen3 MoE finetune recipes (#1265) #2606

Qwen3 MoE finetune recipes (#1265)

Qwen3 MoE finetune recipes (#1265) #2606

# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Build, test, and publish a PyPi wheel (to testpypi).
on:
push:
branches:
- main
- "pull-request/[0-9]+"
- "deploy-release/*"
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
permissions:
id-token: write
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
pre-flight:
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
# build-test-publish-wheel:
# needs: [pre-flight]
# if: |
# !(needs.pre-flight.outputs.docs_only == 'true'
# || needs.pre-flight.outputs.is_deployment_workflow == 'true')
# uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
# with:
# dry-run: true
# python-package: megatron.bridge
# python-version: "3.10"
# packaging: uv
# no-publish: ${{ !(github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) }}
# has-src-dir: true
# skip-test-wheel: true
# custom-container: nvcr.io/nvidia/pytorch:25.05-py3
# runner: self-hosted-nemo
# no-build-isolation: true
# submodules: recursive
# container-options: "--gpus all --runtime=nvidia"
# secrets:
# TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
# SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
# SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
# GH_TOKEN: ${{ secrets.PAT }}
# build-test-publish-wheel-summary:
# needs: [pre-flight, build-test-publish-wheel]
# if: |
# (
# needs.pre-flight.outputs.docs_only == 'true'
# || needs.pre-flight.outputs.is_deployment_workflow == 'true'
# || always()
# )
# && !cancelled()
# runs-on: ubuntu-latest
# steps:
# - name: Result
# run: |
# FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
# if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
# echo "✅ All previous jobs completed successfully"
# exit 0
# else
# echo "❌ Found $FAILED_JOBS failed job(s)"
# # Show which jobs failed
# gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
# exit 1
# fi