Enabling ZeroBubbleV schedule in GraphPP (#250) #685
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test TorchTitan Integration | |
| on: | |
| pull_request: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| concurrency: | |
| group: test-torchtitan-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| test-torchtitan: | |
| name: Test TorchTitan Integration (cuda12.6-py3.12) | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| include: | |
| - name: 12xlargegpu | |
| runs-on: linux.g5.12xlarge.nvidia.gpu | |
| torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126' | |
| gpu-arch-type: "cuda" | |
| gpu-arch-version: "12.6" | |
| with: | |
| timeout: 60 | |
| runner: ${{ matrix.runs-on }} | |
| gpu-arch-type: ${{ matrix.gpu-arch-type }} | |
| gpu-arch-version: ${{ matrix.gpu-arch-version }} | |
| submodules: recursive | |
| script: | | |
| conda create --yes --quiet --name py312 python=3.12 | |
| source $(conda info --base)/etc/profile.d/conda.sh | |
| conda activate py312 | |
| pip install --quiet -r requirements-test.txt | |
| # For some reason the spec above isnt working | |
| pip uninstall -y torch | |
| pip install --no-input --quiet --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126 | |
| pip install --quiet . | |
| # Clone TorchTitan on autoparallel branch | |
| git clone --branch autoparallel https://github.com/pytorch/torchtitan.git | |
| cd torchtitan | |
| pip install --quiet -r requirements.txt | |
| # Run TorchTitan training with AutoParallel | |
| NGPU=4 CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh \ | |
| --model.name auto_parallel.llama3 \ | |
| --parallelism.tensor_parallel_degree 4 \ | |
| --training.dataset c4 \ | |
| --compile.enable |