Skip to content

Test other changes #3266

Test other changes

Test other changes #3266

Workflow file for this run

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2024-2025, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Author S. Siso, STFC Daresbury Lab
# This workflow will use a self-hosted runner to perform the more expensive
# NEMOv5 integrations tests that are not run on GHA systems.
name: NEMOv5 Integration Tests
on:
push
env:
NVFORTRAN_VERSION: 25.1
ONEAPI_VERSION: 2025.0
GCC_VERSION: 14
NUM_PARALLEL: 16
jobs:
run_if_on_mirror:
if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
runs-on: self-hosted
outputs:
bench_gfortran_omp_cpu: ${{ steps.bench_gfortran_omp_cpu.outputs.time }}
bench_nvfortran_omp_offload: ${{ steps.bench_nvfortran_omp_offload.outputs.time }}
bench_nvfortran_omp_offload_build: ${{ steps.bench_nvfortran_omp_offload.outputs.build_time }}
orca1_nvfortran_omp_offload: ${{ steps.orca1_nvfortran_omp_offload.outputs.time }}
orca2_nvfortran_omp_offload: ${{ steps.orca2_nvfortran_omp_offload.outputs.time }}
bench_nvfortran_omp_offload_async: ${{ steps.bench_nvfortran_omp_offload_async.outputs.time }}
orca2_nvfortran_omp_offload_async: ${{ steps.orca2_nvfortran_omp_offload_async.outputs.time }}
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
# This is required to get the commit history for merge commits for
# the ci-skip check below.
fetch-depth: '0'
- name: Check for [skip ci] in commit message
uses: mstachniuk/ci-skip@v1
with:
# This setting causes the tests to 'fail' if [skip ci] is specified
fail-fast: true
commit-filter: '[skip ci]'
- name: Install dependencies
run: |
module load python/${PYTHON_VERSION}
python -m venv .runner_venv
source .runner_venv/bin/activate
python -m pip install --upgrade pip
# Uncomment the below to use the submodule version of fparser rather
# than the latest release from pypi.
# pip install external/fparser
pip install .
- name: Reset working directory
run: |
rm -rf /archive/psyclone-tests/latest-run/UKMO-NEMOv5
cp -r /archive/psyclone-tests/nemo-inputs/UKMO-NEMOv5 \
/archive/psyclone-tests/latest-run/UKMO-NEMOv5
# PSyclone passthrough for 5.0-beta of NEMO.
- name: NEMO 5.0 gfortran passthrough
# Only bother doing passthrough if this is a re-run of a previous test.
if: ${{ github.run_attempt != '1' }}
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=BENCH_PASSTHROUGH_GCC
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
mpirun -np 4 ./nemo
tail run.stat
# This was produced with gfortran, so we can do an exact diff
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
- name: NEMO 5.0 nvidia passthrough
# Only bother doing passthrough if this is a re-run of a previous test.
if: ${{ github.run_attempt != '1' }}
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=BENCH_PASSTHROUGH_NVHPC
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -Mr8 -O2 -nofma -Mnovect -g"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
-j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
./nemo
tail run.stat
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat
export VAR_TIME=$(awk '/ step /{print $3}' timing.output | head -n 1 | sed -e 's/s//')
echo "Time-stepping duration = " $VAR_TIME
- name: NEMO 5.0 Intel passthrough
# Only bother doing passthrough if this is a re-run of a previous test.
if: ${{ github.run_attempt != '1' }}
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%oneapi@${ONEAPI_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=BENCH_PASSTHROUGH_ONEAPI
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -r8 -O2 -fp-model precise -fno-alias -g"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
mpirun -np 6 ./nemo
tail run.stat
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.oneapi.small.10steps run.stat
- name: NEMO 5.0 gfortran OpenMP for CPUs (BENCH)
id: bench_gfortran_omp_cpu
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=BENCH_OMP_THREADING_GCC
# Set up FCM envvars to use psyclonefc and compile with OpenMP for CPU
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py"
export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g -fopenmp"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
tail run.stat
# TODO #3112: Fix differences with baseline result
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
cat timing.output
export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia OpenMP for GPUs (BENCH - managed memory)
id: bench_nvfortran_omp_offload
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC
# Make sure the profiling wrapper is compiled with the same compiler
export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/
cd $PROFILING_DIR
make clean
F90=$MPIF90 make
# First do a debug-build: set the FCM environemnt variables to use flags and intrinsics
# with numerically reproducible results and enable PROFILING hooks
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm
export ENABLE_PROFILING=1
# We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export REPRODUCIBLE=1
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1
# Run reproducible test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
export CUDA_VISIBLE_DEVICES=1
OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
# We can compare all digits for this build
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat
# Now do a fast-build (without reproducible or profiling options, which have a
# big impact for BENCH due to some inner-loop REAL intrinsics)
cd $NEMO_DIR
unset REPRODUCIBLE
unset ENABLE_PROFILING
export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed"
rm -rf tests/${TEST_DIR}
export BUILD_START="${SECONDS}"
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1
export BUILD_ELAPSED=$((${SECONDS}-${BUILD_START}))
# Run non-reproducible test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
export NV_ACC_POOL_THRESHOLD=75
export CUDA_VISIBLE_DEVICES=1
OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
cat timing.output
export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
echo "build_time=${BUILD_ELAPSED}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA1 - managed memory)
id: orca1_nvfortran_omp_offload
run: |
# Set up environment
# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
# source .runner_venv/bin/activate
# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
# export PSYCLONE_HOME=${PWD}/.runner_venv
# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
# export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC
# # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
# # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
# cd $NEMO_DIR
# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
# export REPRODUCIBLE=1
# # Clean up and compile
# rm -rf cfgs/${TEST_DIR}
# ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
# -j ${NUM_PARALLEL} -v 1
# # Run test
# cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
# ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* .
# # Make sure mpi has been built with cuda support
# ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
# # Run with round robin allocations of GPUs to MPI ranks
# mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo'
# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat
# export VAR_TIME=$(grep "local MPI proces" timing.output | head -n 1 | awk '{print $5}' | tr -d s)
# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA2 - managed memory)
id: orca2_nvfortran_omp_offload
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export REPRODUCIBLE=1
# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* .
# Uses both, threading and offloading
export CUDA_VISIBLE_DEVICES=1
OMP_NUM_THREADS=4 ./nemo
cat timing.output
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat
export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia Async OpenMP for GPUs (BENCH - managed memory)
id: bench_nvfortran_omp_offload_async
run: |
# # Set up environment
# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
# source .runner_venv/bin/activate
# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
# export PSYCLONE_HOME=${PWD}/.runner_venv
# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
# export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC_ASYNC
# # Make sure the profiling wrapper is compiled with the same compiler
# export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/
# cd $PROFILING_DIR
# make clean
# F90=$MPIF90 make
# # First do a debug-build: set the environemnt variables to use flags and intrinsics
# # with numerically reproducible results and enable PROFILING hooks
# cd $NEMO_DIR
# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm
# export ENABLE_PROFILING=1
# # We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
# export REPRODUCIBLE=1
# export ASYNC_PARALLEL=1
# # Clean up and compile
# rm -rf tests/${TEST_DIR}
# ./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
# -j ${NUM_PARALLEL} -v 1
# # Run reproducible test
# cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
# cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
# export CUDA_VISIBLE_DEVICES=1
# OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
# # We can compare all digits for this build
# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat
# # Now do a fast-build (without reproducible or profiling options, which have a
# # big impact for BENCH due to some inner-loop REAL intrinsics)
# cd $NEMO_DIR
# unset REPRODUCIBLE
# unset ENABLE_PROFILING
# export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed"
# rm -rf tests/${TEST_DIR}
# export NV_ACC_POOL_THRESHOLD=75
# export CUDA_VISIBLE_DEVICES=1
# ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
# -j ${NUM_PARALLEL} -v 1
# # Run non-reproducible test
# cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
# cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
# OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
# export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA2 - managed memory)
id: orca2_nvfortran_omp_offload_async
run: |
# # Set up environment
# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
# source .runner_venv/bin/activate
# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
# export PSYCLONE_HOME=${PWD}/.runner_venv
# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
# export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC_ASYNC
# # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
# # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
# cd $NEMO_DIR
# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
# export REPRODUCIBLE=1
# export ASYNC_PARALLEL=1
# # Clean up and compile
# rm -rf cfgs/${TEST_DIR}
# ./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
# add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1
# # Run test
# cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
# ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* .
# # Uses both, threading and offloading
# export CUDA_VISIBLE_DEVICES=1
# OMP_NUM_THREADS=4 ./nemo
# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat
# export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s)
# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
- name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA1 - managed memory)
# Only do this Action if this is a re-run
if: ${{ github.run_attempt != '1' }}
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
source .runner_venv/bin/activate
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC_ASYNC
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export REPRODUCIBLE=1
export ASYNC_PARALLEL=1
# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
-j ${NUM_PARALLEL} -v 1
# Run test
cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* .
# Make sure mpi has been built with cuda support
ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
# Run with round robin allocations of GPUs to MPI ranks
mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo'
diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat | true
export TIME_sec=$(grep "local MPI proces" timing.output | head -n 1 | awk '{print $5}' | tr -d s)
upload_if_on_mirror:
if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
runs-on: ubuntu-latest
needs: run_if_on_mirror
steps:
- name: Install mongosh
run: |
sudo apt-get install gnupg
wget -qO- https://www.mongodb.org/static/pgp/server-8.0.asc | sudo tee /etc/apt/trusted.gpg.d/server-8.0.asc
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list
sudo apt-get update
sudo apt-get install -y mongodb-mongosh
- name: Upload results
run: |
export COMMON_FIELDS='branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", date: new Date()'
export COMMON_FIELDS=${COMMON_FIELDS}', github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'"'
export COMMON_FIELDS=${COMMON_FIELDS}', system: "Rupert"'
mongosh \
"mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
--quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
--password ${{ secrets.MONGODB_PASSWORD }} \
--eval 'db.GitHub_CI.insertMany([
{
ci_test: "NEMOv5 OpenMP for CPU (BENCH)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"gfortran-'"$GCC_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_gfortran_omp_cpu}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU (BENCH)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU (BENCH) build time",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_build}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU (ORCA1)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca1_nvfortran_omp_offload}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU (ORCA2)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU Async (BENCH)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_async}}"',
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU Async (ORCA2)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload_async}}"',
'"$COMMON_FIELDS"'
}])'