Test other changes #3266

Workflow file for this run

.github/workflows/nemo_v5_tests.yml at b04aa15

	# -----------------------------------------------------------------------------
	# BSD 3-Clause License
	#
	# Copyright (c) 2024-2025, Science and Technology Facilities Council.
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# * Redistributions of source code must retain the above copyright notice, this
	# list of conditions and the following disclaimer.
	#
	# * Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	#
	# * Neither the name of the copyright holder nor the names of its
	# contributors may be used to endorse or promote products derived from
	# this software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	# POSSIBILITY OF SUCH DAMAGE.
	# -----------------------------------------------------------------------------
	# Author S. Siso, STFC Daresbury Lab

	# This workflow will use a self-hosted runner to perform the more expensive
	# NEMOv5 integrations tests that are not run on GHA systems.

	name: NEMOv5 Integration Tests

	on:
	push

	env:
	NVFORTRAN_VERSION: 25.1
	ONEAPI_VERSION: 2025.0
	GCC_VERSION: 14
	NUM_PARALLEL: 16

	jobs:
	run_if_on_mirror:
	if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
	runs-on: self-hosted

	outputs:
	bench_gfortran_omp_cpu: ${{ steps.bench_gfortran_omp_cpu.outputs.time }}
	bench_nvfortran_omp_offload: ${{ steps.bench_nvfortran_omp_offload.outputs.time }}
	bench_nvfortran_omp_offload_build: ${{ steps.bench_nvfortran_omp_offload.outputs.build_time }}
	orca1_nvfortran_omp_offload: ${{ steps.orca1_nvfortran_omp_offload.outputs.time }}
	orca2_nvfortran_omp_offload: ${{ steps.orca2_nvfortran_omp_offload.outputs.time }}
	bench_nvfortran_omp_offload_async: ${{ steps.bench_nvfortran_omp_offload_async.outputs.time }}
	orca2_nvfortran_omp_offload_async: ${{ steps.orca2_nvfortran_omp_offload_async.outputs.time }}

	steps:
	- uses: actions/checkout@v3
	with:
	submodules: recursive
	# This is required to get the commit history for merge commits for
	# the ci-skip check below.
	fetch-depth: '0'
	- name: Check for [skip ci] in commit message
	uses: mstachniuk/ci-skip@v1
	with:
	# This setting causes the tests to 'fail' if [skip ci] is specified
	fail-fast: true
	commit-filter: '[skip ci]'
	- name: Install dependencies
	run: \|
	module load python/${PYTHON_VERSION}
	python -m venv .runner_venv
	source .runner_venv/bin/activate
	python -m pip install --upgrade pip
	# Uncomment the below to use the submodule version of fparser rather
	# than the latest release from pypi.
	# pip install external/fparser
	pip install .
	- name: Reset working directory
	run: \|
	rm -rf /archive/psyclone-tests/latest-run/UKMO-NEMOv5
	cp -r /archive/psyclone-tests/nemo-inputs/UKMO-NEMOv5 \
	/archive/psyclone-tests/latest-run/UKMO-NEMOv5

	# PSyclone passthrough for 5.0-beta of NEMO.
	- name: NEMO 5.0 gfortran passthrough
	# Only bother doing passthrough if this is a re-run of a previous test.
	if: ${{ github.run_attempt != '1' }}
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=BENCH_PASSTHROUGH_GCC

	# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g"

	# Clean up and compile
	rm -rf tests/${TEST_DIR}
	./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
	add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	mpirun -np 4 ./nemo
	tail run.stat
	# This was produced with gfortran, so we can do an exact diff
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat

	- name: NEMO 5.0 nvidia passthrough
	# Only bother doing passthrough if this is a re-run of a previous test.
	if: ${{ github.run_attempt != '1' }}
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=BENCH_PASSTHROUGH_NVHPC

	# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export FCFLAGS="-i4 -Mr8 -O2 -nofma -Mnovect -g"

	# Clean up and compile
	rm -rf tests/${TEST_DIR}
	./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
	-j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	./nemo
	tail run.stat
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat
	export VAR_TIME=$(awk '/ step /{print $3}' timing.output \| head -n 1 \| sed -e 's/s//')
	echo "Time-stepping duration = " $VAR_TIME

	- name: NEMO 5.0 Intel passthrough
	# Only bother doing passthrough if this is a re-run of a previous test.
	if: ${{ github.run_attempt != '1' }}
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%oneapi@${ONEAPI_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=BENCH_PASSTHROUGH_ONEAPI

	# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export FCFLAGS="-i4 -r8 -O2 -fp-model precise -fno-alias -g"

	# Clean up and compile
	rm -rf tests/${TEST_DIR}
	./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \
	add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	mpirun -np 6 ./nemo
	tail run.stat
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.oneapi.small.10steps run.stat

	- name: NEMO 5.0 gfortran OpenMP for CPUs (BENCH)
	id: bench_gfortran_omp_cpu
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=BENCH_OMP_THREADING_GCC

	# Set up FCM envvars to use psyclonefc and compile with OpenMP for CPU
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export PSYCLONE_COMPILER=$MPIF90
	export MPIF90=psyclonefc
	export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py"
	export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g -fopenmp"

	# Clean up and compile
	rm -rf tests/${TEST_DIR}
	./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
	tail run.stat
	# TODO #3112: Fix differences with baseline result
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat
	cat timing.output
	export VAR_TIME=$(grep "local proces" timing.output \| head -n 1 \| awk '{print $4}' \| tr -d s)
	echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"

	- name: NEMO 5.0 nvidia OpenMP for GPUs (BENCH - managed memory)
	id: bench_nvfortran_omp_offload
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC

	# Make sure the profiling wrapper is compiled with the same compiler
	export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/
	cd $PROFILING_DIR
	make clean
	F90=$MPIF90 make

	# First do a debug-build: set the FCM environemnt variables to use flags and intrinsics
	# with numerically reproducible results and enable PROFILING hooks
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm
	export ENABLE_PROFILING=1
	# We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	export REPRODUCIBLE=1
	export PSYCLONE_COMPILER=$MPIF90
	export MPIF90=psyclonefc
	export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py"
	# Clean up and compile
	rm -rf tests/${TEST_DIR}
	./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1

	# Run reproducible test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	export CUDA_VISIBLE_DEVICES=1
	OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
	# We can compare all digits for this build
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat

	# Now do a fast-build (without reproducible or profiling options, which have a
	# big impact for BENCH due to some inner-loop REAL intrinsics)
	cd $NEMO_DIR
	unset REPRODUCIBLE
	unset ENABLE_PROFILING
	export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed"
	rm -rf tests/${TEST_DIR}
	export BUILD_START="${SECONDS}"
	./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1
	export BUILD_ELAPSED=$((${SECONDS}-${BUILD_START}))

	# Run non-reproducible test
	cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	export NV_ACC_POOL_THRESHOLD=75
	export CUDA_VISIBLE_DEVICES=1
	OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
	cat timing.output
	export VAR_TIME=$(grep "local proces" timing.output \| head -n 1 \| awk '{print $4}' \| tr -d s)
	echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"
	echo "build_time=${BUILD_ELAPSED}" >> "${GITHUB_OUTPUT}"

	- name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA1 - managed memory)
	id: orca1_nvfortran_omp_offload
	run: \|
	# Set up environment
	# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	# source .runner_venv/bin/activate
	# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	# export PSYCLONE_HOME=${PWD}/.runner_venv
	# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	# export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC

	# # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	# # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	# cd $NEMO_DIR
	# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	# export REPRODUCIBLE=1

	# # Clean up and compile
	# rm -rf cfgs/${TEST_DIR}
	# ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	# -j ${NUM_PARALLEL} -v 1

	# # Run test
	# cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
	# ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* .
	# # Make sure mpi has been built with cuda support
	# ompi_info --parsable --all \| grep mpi_built_with_cuda_support:value
	# # Run with round robin allocations of GPUs to MPI ranks
	# mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo'
	# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat
	# export VAR_TIME=$(grep "local MPI proces" timing.output \| head -n 1 \| awk '{print $5}' \| tr -d s)
	# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"

	- name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA2 - managed memory)
	id: orca2_nvfortran_omp_offload
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC

	# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	export REPRODUCIBLE=1

	# Clean up and compile
	rm -rf cfgs/${TEST_DIR}
	./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
	ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* .
	# Uses both, threading and offloading
	export CUDA_VISIBLE_DEVICES=1
	OMP_NUM_THREADS=4 ./nemo
	cat timing.output
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat
	export VAR_TIME=$(grep "local proces" timing.output \| head -n 1 \| awk '{print $4}' \| tr -d s)
	echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"


	- name: NEMO 5.0 nvidia Async OpenMP for GPUs (BENCH - managed memory)
	id: bench_nvfortran_omp_offload_async
	run: \|
	# # Set up environment
	# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	# source .runner_venv/bin/activate
	# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	# export PSYCLONE_HOME=${PWD}/.runner_venv
	# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	# export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC_ASYNC

	# # Make sure the profiling wrapper is compiled with the same compiler
	# export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/
	# cd $PROFILING_DIR
	# make clean
	# F90=$MPIF90 make

	# # First do a debug-build: set the environemnt variables to use flags and intrinsics
	# # with numerically reproducible results and enable PROFILING hooks
	# cd $NEMO_DIR
	# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm
	# export ENABLE_PROFILING=1
	# # We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	# export REPRODUCIBLE=1
	# export ASYNC_PARALLEL=1
	# # Clean up and compile
	# rm -rf tests/${TEST_DIR}
	# ./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	# -j ${NUM_PARALLEL} -v 1

	# # Run reproducible test
	# cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	# cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	# export CUDA_VISIBLE_DEVICES=1
	# OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
	# # We can compare all digits for this build
	# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat

	# # Now do a fast-build (without reproducible or profiling options, which have a
	# # big impact for BENCH due to some inner-loop REAL intrinsics)
	# cd $NEMO_DIR
	# unset REPRODUCIBLE
	# unset ENABLE_PROFILING
	# export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed"
	# rm -rf tests/${TEST_DIR}
	# export NV_ACC_POOL_THRESHOLD=75
	# export CUDA_VISIBLE_DEVICES=1
	# ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	# -j ${NUM_PARALLEL} -v 1
	# # Run non-reproducible test
	# cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
	# cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
	# OMP_NUM_THREADS=4 mpirun -np 1 ./nemo
	# export VAR_TIME=$(grep "local proces" timing.output \| head -n 1 \| awk '{print $4}' \| tr -d s)
	# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"

	- name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA2 - managed memory)
	id: orca2_nvfortran_omp_offload_async
	run: \|
	# # Set up environment
	# source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	# spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	# source .runner_venv/bin/activate
	# export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	# export PSYCLONE_HOME=${PWD}/.runner_venv
	# export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	# export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC_ASYNC

	# # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	# # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	# cd $NEMO_DIR
	# cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	# export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	# export REPRODUCIBLE=1
	# export ASYNC_PARALLEL=1

	# # Clean up and compile
	# rm -rf cfgs/${TEST_DIR}
	# ./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	# add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1

	# # Run test
	# cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
	# ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* .
	# # Uses both, threading and offloading
	# export CUDA_VISIBLE_DEVICES=1
	# OMP_NUM_THREADS=4 ./nemo
	# diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat
	# export VAR_TIME=$(grep "local proces" timing.output \| head -n 1 \| awk '{print $4}' \| tr -d s)
	# echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"

	- name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA1 - managed memory)
	# Only do this Action if this is a re-run
	if: ${{ github.run_attempt != '1' }}
	run: \|
	# Set up environment
	source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
	spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION}
	source .runner_venv/bin/activate
	export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
	export PSYCLONE_HOME=${PWD}/.runner_venv
	export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
	export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC_ASYNC

	# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
	# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
	cd $NEMO_DIR
	cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
	export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
	export REPRODUCIBLE=1
	export ASYNC_PARALLEL=1

	# Clean up and compile
	rm -rf cfgs/${TEST_DIR}
	./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
	-j ${NUM_PARALLEL} -v 1

	# Run test
	cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
	ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* .
	# Make sure mpi has been built with cuda support
	ompi_info --parsable --all \| grep mpi_built_with_cuda_support:value
	# Run with round robin allocations of GPUs to MPI ranks
	mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo'
	diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat \| true
	export TIME_sec=$(grep "local MPI proces" timing.output \| head -n 1 \| awk '{print $5}' \| tr -d s)

	upload_if_on_mirror:
	if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
	runs-on: ubuntu-latest
	needs: run_if_on_mirror
	steps:
	- name: Install mongosh
	run: \|
	sudo apt-get install gnupg
	wget -qO- https://www.mongodb.org/static/pgp/server-8.0.asc \| sudo tee /etc/apt/trusted.gpg.d/server-8.0.asc
	echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" \| sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list
	sudo apt-get update
	sudo apt-get install -y mongodb-mongosh
	- name: Upload results
	run: \|
	export COMMON_FIELDS='branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", date: new Date()'
	export COMMON_FIELDS=${COMMON_FIELDS}', github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'"'
	export COMMON_FIELDS=${COMMON_FIELDS}', system: "Rupert"'
	mongosh \
	"mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \
	--quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \
	--password ${{ secrets.MONGODB_PASSWORD }} \
	--eval 'db.GitHub_CI.insertMany([
	{
	ci_test: "NEMOv5 OpenMP for CPU (BENCH)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"gfortran-'"$GCC_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_gfortran_omp_cpu}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU (BENCH)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU (BENCH) build time",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_build}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU (ORCA1)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca1_nvfortran_omp_offload}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU (ORCA2)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU Async (BENCH)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_async}}"',
	'"$COMMON_FIELDS"'
	},
	{
	ci_test: "NEMOv5 OpenMP for GPU Async (ORCA2)",
	nemo_version: "NEMO 5.0-RC MO patch",
	compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
	elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload_async}}"',
	'"$COMMON_FIELDS"'
	}])'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Test other changes #3266

Workflow file

Test other changes #3266

Uh oh!

Jobs

Run details

Workflow file for this run