Skip to content

Run tests on TPU

Run tests on TPU #11

Workflow file for this run

name: TPU Tests
on:
push:
branches: [ master ]
pull_request:
release:
types: [created]
permissions:
contents: read # Only read permission is needed for checkout
packages: write
# Define base environment variables at the workflow level
# These can still be used inside steps, just not for the container image definition
env:
PROJECT_ID: gtech-rmi-dev
GAR_LOCATION: us-central1
GAR_REPO: keras-docker-images
IMAGE_NAME: keras-jax-tpu-amd64
IMAGE_TAG: latest
jobs:
build-and-push:
name: Build and Push to GHCR
runs-on: ubuntu-latest # This job doesn't need the special TPU runner
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
# GITHUB_TOKEN is automatically created by Actions and has permissions to push to your repo's package registry.
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and Push Docker Image
uses: docker/build-push-action@v6
with:
context: .
# Push the image to ghcr.io
push: true
# Create a unique tag using the commit SHA for this specific build
tags: ghcr.io/${{ github.repository }}:${{ github.sha }}
test-in-container:
name: Test in Custom Container
# This job must run after the build-and-push job is complete
needs: build-and-push
# Use the required TPU runner
runs-on: linux-x86-ct6e-44-1tpu
# CRITICAL: Use the container image we just pushed in the previous job.
# This satisfies the runner's requirement for a container to be specified.
container:
image: ghcr.io/${{ github.repository }}:${{ github.sha }}
options: --privileged --network host
steps:
- name: Checkout Repository
uses: actions/checkout@v4
# We need the code available inside the container's workspace to run tests.
- name: Run Verification and Tests
run: |
echo "Successfully running inside the custom container from GHCR!"
echo "Current working directory:"
pwd
echo "Contents of current directory:"
ls -la
echo "Verifying JAX installation..."
python3 -c "import jax; print(f'JAX backend: {jax.default_backend()}'); print(f'JAX devices: {jax.devices()}')"
# pull-and-use-image:
# name: Pull & Use Image from GAR
# runs-on: linux-x86-ct6e-44-1tpu
# # The container image path must be a complete string.
# # The 'env' context is not available here.
# # Note the corrected GAR path format: 'us-central1-docker.pkg.dev'
# container:
# # image: us-central1-docker.pkg.dev/gtech-rmi-dev/keras-docker-images/keras-jax-tpu-amd64:latest
# image: ubuntu:22.04
# # options: --privileged --network host
# steps:
# - name: Verify Container Environment
# run: |
# echo "Successfully started the public container! ✅"
# echo "OS Details:"
# cat /etc/os-release
# - name: Checkout Repository
# uses: actions/checkout@v4
# - name: Verify Environment inside Container
# run: |
# echo "Current working directory: $(pwd)"
# echo "Contents of current directory:"
# ls -la
# # Verify Python, JAX, etc., inside the container
# echo "Verifying JAX installation..."
# python3 -c "import jax; print(f'JAX backend: {jax.default_backend()}'); print(f'JAX devices: {jax.devices()}')"
# - name: Authenticate to Google Cloud with Service Account Key
# id: 'auth'
# uses: 'google-github-actions/auth@v2'
# with:
# # Pass the content of your GitHub Secret directly here.
# credentials_json: '${{ secrets.GCP_SA_KEY }}'
# - name: Configure Docker to use Google Artifact Registry
# run: |
# echo "Configuring Docker to authenticate with Google Artifact Registry..."
# # This command uses the credentials set by the 'auth' step to configure Docker.
# gcloud auth configure-docker ${{ env.GAR_LOCATION }}-docker.pkg.dev
# echo "Docker configured."
# - name: Pull Docker Image from Artifact Registry
# run: |
# FULL_IMAGE_PATH="${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ env.PROJECT_ID }}/${{ env.GAR_REPO }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}"
# echo "Attempting to pull image: $FULL_IMAGE_PATH"
# docker pull "$FULL_IMAGE_PATH"
# echo "Successfully pulled image: $FULL_IMAGE_PATH"
# - name: Verify Pulled Image (Optional)
# run: |
# echo "Listing local Docker images..."
# docker images | grep "${{ env.IMAGE_NAME }}"
# echo "Image verification complete."
# - name: Run Docker Container (with TPU options if on TPU VM)
# run: |
# FULL_IMAGE_PATH="${{ env.GAR_LOCATION }}-docker.pkg.dev/${{ env.PROJECT_ID }}/${{ env.GAR_REPO }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}"
# echo "Running Docker container: $FULL_IMAGE_PATH"
# # IMPORTANT: Add --privileged and --network host ONLY IF this job is running
# # on your TPU VM self-hosted runner AND it has the necessary permissions.
# # If running on 'ubuntu-latest', these flags are not meaningful for TPU access.
# docker run --rm \
# --privileged \
# --network host \
# "$FULL_IMAGE_PATH" \
# /bin/bash -c " \
# echo 'Container is running...'; \
# # Add your test or verification commands here, e.g.:
# # python3 -c 'import jax; print(jax.default_backend())'; \
# # pytest your_tests.py; \
# echo 'Container execution finished.'; \
# "