Test other changes #3266
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ----------------------------------------------------------------------------- | |
| # BSD 3-Clause License | |
| # | |
| # Copyright (c) 2024-2025, Science and Technology Facilities Council. | |
| # All rights reserved. | |
| # | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are met: | |
| # | |
| # * Redistributions of source code must retain the above copyright notice, this | |
| # list of conditions and the following disclaimer. | |
| # | |
| # * Redistributions in binary form must reproduce the above copyright notice, | |
| # this list of conditions and the following disclaimer in the documentation | |
| # and/or other materials provided with the distribution. | |
| # | |
| # * Neither the name of the copyright holder nor the names of its | |
| # contributors may be used to endorse or promote products derived from | |
| # this software without specific prior written permission. | |
| # | |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN | |
| # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| # POSSIBILITY OF SUCH DAMAGE. | |
| # ----------------------------------------------------------------------------- | |
| # Author S. Siso, STFC Daresbury Lab | |
| # This workflow will use a self-hosted runner to perform the more expensive | |
| # NEMOv5 integrations tests that are not run on GHA systems. | |
| name: NEMOv5 Integration Tests | |
| on: | |
| push | |
| env: | |
| NVFORTRAN_VERSION: 25.1 | |
| ONEAPI_VERSION: 2025.0 | |
| GCC_VERSION: 14 | |
| NUM_PARALLEL: 16 | |
| jobs: | |
| run_if_on_mirror: | |
| if: ${{ github.repository == 'stfc/PSyclone-mirror' }} | |
| runs-on: self-hosted | |
| outputs: | |
| bench_gfortran_omp_cpu: ${{ steps.bench_gfortran_omp_cpu.outputs.time }} | |
| bench_nvfortran_omp_offload: ${{ steps.bench_nvfortran_omp_offload.outputs.time }} | |
| bench_nvfortran_omp_offload_build: ${{ steps.bench_nvfortran_omp_offload.outputs.build_time }} | |
| orca1_nvfortran_omp_offload: ${{ steps.orca1_nvfortran_omp_offload.outputs.time }} | |
| orca2_nvfortran_omp_offload: ${{ steps.orca2_nvfortran_omp_offload.outputs.time }} | |
| bench_nvfortran_omp_offload_async: ${{ steps.bench_nvfortran_omp_offload_async.outputs.time }} | |
| orca2_nvfortran_omp_offload_async: ${{ steps.orca2_nvfortran_omp_offload_async.outputs.time }} | |
| steps: | |
| - uses: actions/checkout@v3 | |
| with: | |
| submodules: recursive | |
| # This is required to get the commit history for merge commits for | |
| # the ci-skip check below. | |
| fetch-depth: '0' | |
| - name: Check for [skip ci] in commit message | |
| uses: mstachniuk/ci-skip@v1 | |
| with: | |
| # This setting causes the tests to 'fail' if [skip ci] is specified | |
| fail-fast: true | |
| commit-filter: '[skip ci]' | |
| - name: Install dependencies | |
| run: | | |
| module load python/${PYTHON_VERSION} | |
| python -m venv .runner_venv | |
| source .runner_venv/bin/activate | |
| python -m pip install --upgrade pip | |
| # Uncomment the below to use the submodule version of fparser rather | |
| # than the latest release from pypi. | |
| # pip install external/fparser | |
| pip install . | |
| - name: Reset working directory | |
| run: | | |
| rm -rf /archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| cp -r /archive/psyclone-tests/nemo-inputs/UKMO-NEMOv5 \ | |
| /archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| # PSyclone passthrough for 5.0-beta of NEMO. | |
| - name: NEMO 5.0 gfortran passthrough | |
| # Only bother doing passthrough if this is a re-run of a previous test. | |
| if: ${{ github.run_attempt != '1' }} | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=BENCH_PASSTHROUGH_GCC | |
| # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g" | |
| # Clean up and compile | |
| rm -rf tests/${TEST_DIR} | |
| ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \ | |
| add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| mpirun -np 4 ./nemo | |
| tail run.stat | |
| # This was produced with gfortran, so we can do an exact diff | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat | |
| - name: NEMO 5.0 nvidia passthrough | |
| # Only bother doing passthrough if this is a re-run of a previous test. | |
| if: ${{ github.run_attempt != '1' }} | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=BENCH_PASSTHROUGH_NVHPC | |
| # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export FCFLAGS="-i4 -Mr8 -O2 -nofma -Mnovect -g" | |
| # Clean up and compile | |
| rm -rf tests/${TEST_DIR} | |
| ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \ | |
| -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| ./nemo | |
| tail run.stat | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat | |
| export VAR_TIME=$(awk '/ step /{print $3}' timing.output | head -n 1 | sed -e 's/s//') | |
| echo "Time-stepping duration = " $VAR_TIME | |
| - name: NEMO 5.0 Intel passthrough | |
| # Only bother doing passthrough if this is a re-run of a previous test. | |
| if: ${{ github.run_attempt != '1' }} | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%oneapi@${ONEAPI_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=BENCH_PASSTHROUGH_ONEAPI | |
| # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export FCFLAGS="-i4 -r8 -O2 -fp-model precise -fno-alias -g" | |
| # Clean up and compile | |
| rm -rf tests/${TEST_DIR} | |
| ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/passthrough.py \ | |
| add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| mpirun -np 6 ./nemo | |
| tail run.stat | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.oneapi.small.10steps run.stat | |
| - name: NEMO 5.0 gfortran OpenMP for CPUs (BENCH) | |
| id: bench_gfortran_omp_cpu | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%gcc@${GCC_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=BENCH_OMP_THREADING_GCC | |
| # Set up FCM envvars to use psyclonefc and compile with OpenMP for CPU | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export PSYCLONE_COMPILER=$MPIF90 | |
| export MPIF90=psyclonefc | |
| export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py" | |
| export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g -fopenmp" | |
| # Clean up and compile | |
| rm -rf tests/${TEST_DIR} | |
| ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} add_key "key_nosignedzero" -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| OMP_NUM_THREADS=4 mpirun -np 1 ./nemo | |
| tail run.stat | |
| # TODO #3112: Fix differences with baseline result | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.gfortran.small.10steps run.stat | |
| cat timing.output | |
| export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) | |
| echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia OpenMP for GPUs (BENCH - managed memory) | |
| id: bench_nvfortran_omp_offload | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC | |
| # Make sure the profiling wrapper is compiled with the same compiler | |
| export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/ | |
| cd $PROFILING_DIR | |
| make clean | |
| F90=$MPIF90 make | |
| # First do a debug-build: set the FCM environemnt variables to use flags and intrinsics | |
| # with numerically reproducible results and enable PROFILING hooks | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm | |
| export ENABLE_PROFILING=1 | |
| # We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| export REPRODUCIBLE=1 | |
| export PSYCLONE_COMPILER=$MPIF90 | |
| export MPIF90=psyclonefc | |
| export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py" | |
| # Clean up and compile | |
| rm -rf tests/${TEST_DIR} | |
| ./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1 | |
| # Run reproducible test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| export CUDA_VISIBLE_DEVICES=1 | |
| OMP_NUM_THREADS=4 mpirun -np 1 ./nemo | |
| # We can compare all digits for this build | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat | |
| # Now do a fast-build (without reproducible or profiling options, which have a | |
| # big impact for BENCH due to some inner-loop REAL intrinsics) | |
| cd $NEMO_DIR | |
| unset REPRODUCIBLE | |
| unset ENABLE_PROFILING | |
| export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed" | |
| rm -rf tests/${TEST_DIR} | |
| export BUILD_START="${SECONDS}" | |
| ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1 | |
| export BUILD_ELAPSED=$((${SECONDS}-${BUILD_START})) | |
| # Run non-reproducible test | |
| cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| export NV_ACC_POOL_THRESHOLD=75 | |
| export CUDA_VISIBLE_DEVICES=1 | |
| OMP_NUM_THREADS=4 mpirun -np 1 ./nemo | |
| cat timing.output | |
| export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) | |
| echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| echo "build_time=${BUILD_ELAPSED}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA1 - managed memory) | |
| id: orca1_nvfortran_omp_offload | |
| run: | | |
| # Set up environment | |
| # source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| # spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| # source .runner_venv/bin/activate | |
| # export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| # export PSYCLONE_HOME=${PWD}/.runner_venv | |
| # export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| # export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC | |
| # # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| # # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| # cd $NEMO_DIR | |
| # cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| # export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| # export REPRODUCIBLE=1 | |
| # # Clean up and compile | |
| # rm -rf cfgs/${TEST_DIR} | |
| # ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| # -j ${NUM_PARALLEL} -v 1 | |
| # # Run test | |
| # cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00 | |
| # ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* . | |
| # # Make sure mpi has been built with cuda support | |
| # ompi_info --parsable --all | grep mpi_built_with_cuda_support:value | |
| # # Run with round robin allocations of GPUs to MPI ranks | |
| # mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo' | |
| # diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat | |
| # export VAR_TIME=$(grep "local MPI proces" timing.output | head -n 1 | awk '{print $5}' | tr -d s) | |
| # echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA2 - managed memory) | |
| id: orca2_nvfortran_omp_offload | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC | |
| # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| export REPRODUCIBLE=1 | |
| # Clean up and compile | |
| rm -rf cfgs/${TEST_DIR} | |
| ./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00 | |
| ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* . | |
| # Uses both, threading and offloading | |
| export CUDA_VISIBLE_DEVICES=1 | |
| OMP_NUM_THREADS=4 ./nemo | |
| cat timing.output | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat | |
| export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) | |
| echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia Async OpenMP for GPUs (BENCH - managed memory) | |
| id: bench_nvfortran_omp_offload_async | |
| run: | | |
| # # Set up environment | |
| # source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| # spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| # source .runner_venv/bin/activate | |
| # export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| # export PSYCLONE_HOME=${PWD}/.runner_venv | |
| # export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| # export TEST_DIR=BENCH_OMP_OFFLOAD_NVHPC_ASYNC | |
| # # Make sure the profiling wrapper is compiled with the same compiler | |
| # export PROFILING_DIR=${GITHUB_WORKSPACE}/lib/profiling/nvidia/ | |
| # cd $PROFILING_DIR | |
| # make clean | |
| # F90=$MPIF90 make | |
| # # First do a debug-build: set the environemnt variables to use flags and intrinsics | |
| # # with numerically reproducible results and enable PROFILING hooks | |
| # cd $NEMO_DIR | |
| # cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack_profile.fcm arch/arch-linux_spack_profile.fcm | |
| # export ENABLE_PROFILING=1 | |
| # # We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| # export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| # export REPRODUCIBLE=1 | |
| # export ASYNC_PARALLEL=1 | |
| # # Clean up and compile | |
| # rm -rf tests/${TEST_DIR} | |
| # ./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| # -j ${NUM_PARALLEL} -v 1 | |
| # # Run reproducible test | |
| # cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| # cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| # export CUDA_VISIBLE_DEVICES=1 | |
| # OMP_NUM_THREADS=4 mpirun -np 1 ./nemo | |
| # # We can compare all digits for this build | |
| # diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.bench.nvhpc.small.10steps run.stat | |
| # # Now do a fast-build (without reproducible or profiling options, which have a | |
| # # big impact for BENCH due to some inner-loop REAL intrinsics) | |
| # cd $NEMO_DIR | |
| # unset REPRODUCIBLE | |
| # unset ENABLE_PROFILING | |
| # export FCFLAGS="-i4 -Mr8 -O3 -mp=gpu -gpu=mem:managed" | |
| # rm -rf tests/${TEST_DIR} | |
| # export NV_ACC_POOL_THRESHOLD=75 | |
| # export CUDA_VISIBLE_DEVICES=1 | |
| # ./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| # -j ${NUM_PARALLEL} -v 1 | |
| # # Run non-reproducible test | |
| # cd $NEMO_DIR/tests/${TEST_DIR}/EXP00 | |
| # cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg | |
| # OMP_NUM_THREADS=4 mpirun -np 1 ./nemo | |
| # export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) | |
| # echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA2 - managed memory) | |
| id: orca2_nvfortran_omp_offload_async | |
| run: | | |
| # # Set up environment | |
| # source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| # spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| # source .runner_venv/bin/activate | |
| # export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| # export PSYCLONE_HOME=${PWD}/.runner_venv | |
| # export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| # export TEST_DIR=ORCA2_OMP_OFFLOAD_NVHPC_ASYNC | |
| # # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| # # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| # cd $NEMO_DIR | |
| # cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| # export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| # export REPRODUCIBLE=1 | |
| # export ASYNC_PARALLEL=1 | |
| # # Clean up and compile | |
| # rm -rf cfgs/${TEST_DIR} | |
| # ./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| # add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1 | |
| # # Run test | |
| # cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00 | |
| # ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA2/* . | |
| # # Uses both, threading and offloading | |
| # export CUDA_VISIBLE_DEVICES=1 | |
| # OMP_NUM_THREADS=4 ./nemo | |
| # diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca2.nvhpc.10steps run.stat | |
| # export VAR_TIME=$(grep "local proces" timing.output | head -n 1 | awk '{print $4}' | tr -d s) | |
| # echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}" | |
| - name: NEMO 5.0 nvidia Async OpenMP for GPUs (UKMO ORCA1 - managed memory) | |
| # Only do this Action if this is a re-run | |
| if: ${{ github.run_attempt != '1' }} | |
| run: | | |
| # Set up environment | |
| source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh | |
| spack unload && spack load nemo-build-environment%nvhpc@${NVFORTRAN_VERSION} | |
| source .runner_venv/bin/activate | |
| export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts | |
| export PSYCLONE_HOME=${PWD}/.runner_venv | |
| export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5 | |
| export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC_ASYNC | |
| # Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS | |
| # We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results. | |
| cd $NEMO_DIR | |
| cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm | |
| export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform" | |
| export REPRODUCIBLE=1 | |
| export ASYNC_PARALLEL=1 | |
| # Clean up and compile | |
| rm -rf cfgs/${TEST_DIR} | |
| ./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \ | |
| -j ${NUM_PARALLEL} -v 1 | |
| # Run test | |
| cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00 | |
| ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1/* . | |
| # Make sure mpi has been built with cuda support | |
| ompi_info --parsable --all | grep mpi_built_with_cuda_support:value | |
| # Run with round robin allocations of GPUs to MPI ranks | |
| mpirun -n 2 sh -c 'CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK ./nemo' | |
| diff $PSYCLONE_NEMO_DIR/KGOs/run.stat.orca1.nvhpc.10steps run.stat | true | |
| export TIME_sec=$(grep "local MPI proces" timing.output | head -n 1 | awk '{print $5}' | tr -d s) | |
| upload_if_on_mirror: | |
| if: ${{ github.repository == 'stfc/PSyclone-mirror' }} | |
| runs-on: ubuntu-latest | |
| needs: run_if_on_mirror | |
| steps: | |
| - name: Install mongosh | |
| run: | | |
| sudo apt-get install gnupg | |
| wget -qO- https://www.mongodb.org/static/pgp/server-8.0.asc | sudo tee /etc/apt/trusted.gpg.d/server-8.0.asc | |
| echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu noble/mongodb-org/8.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-8.0.list | |
| sudo apt-get update | |
| sudo apt-get install -y mongodb-mongosh | |
| - name: Upload results | |
| run: | | |
| export COMMON_FIELDS='branch_name: "'"$GITHUB_REF_NAME"'", commit: "'"$GITHUB_SHA"'", date: new Date()' | |
| export COMMON_FIELDS=${COMMON_FIELDS}', github_job: "'"$GITHUB_RUN_ID"'"-"'"$GITHUB_RUN_ATTEMPT"'"' | |
| export COMMON_FIELDS=${COMMON_FIELDS}', system: "Rupert"' | |
| mongosh \ | |
| "mongodb+srv://cluster0.x8ncpxi.mongodb.net/PerformanceMonitoring" \ | |
| --quiet --apiVersion 1 --username ${{ secrets.MONGODB_USERNAME }} \ | |
| --password ${{ secrets.MONGODB_PASSWORD }} \ | |
| --eval 'db.GitHub_CI.insertMany([ | |
| { | |
| ci_test: "NEMOv5 OpenMP for CPU (BENCH)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"gfortran-'"$GCC_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_gfortran_omp_cpu}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU (BENCH)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU (BENCH) build time", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_build}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU (ORCA1)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca1_nvfortran_omp_offload}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU (ORCA2)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU Async (BENCH)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.bench_nvfortran_omp_offload_async}}"', | |
| '"$COMMON_FIELDS"' | |
| }, | |
| { | |
| ci_test: "NEMOv5 OpenMP for GPU Async (ORCA2)", | |
| nemo_version: "NEMO 5.0-RC MO patch", | |
| compiler:"nvhpc-'"$NVFORTRAN_VERSION"'", | |
| elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca2_nvfortran_omp_offload_async}}"', | |
| '"$COMMON_FIELDS"' | |
| }])' |