diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d13707..87e668a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,55 +9,139 @@ # Enbable debug mode by passing -DCMAKE_BUILD_TYPE=Debug to CMake, default is # Release -cmake_minimum_required (VERSION 3.11) +cmake_minimum_required (VERSION 3.24) -option(USE_GPU "user-set flag to compile in GPU code" FALSE) +project (BRANSON + VERSION 0.8 + DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition." +) -if(DEFINED ENV{CUDADIR} - OR DEFINED ENV{CUDACXX} - OR DEFINED ENV{CUDA_HOME}) - if(USE_GPU) - set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'") - enable_language(CUDA) - endif() -elseif( - "$ENV{LOADEDMODULES}" MATCHES "rocmcc" - OR DEFINED ENV{HIPCXX} - OR DEFINED CMAKE_HIP_COMPILER - OR DEFINED ENV{ROCM_PATH}) - if(USE_GPU) - set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'") - #if(CMAKE_HIP_COMPILER MATCHES "CC") - # set(CMAKE_HIP_FLAGS "-x hip") - #endif() - enable_language(HIP) +## Fail if someone tries to config an in-source build. +if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") + message(FATAL_ERROR "In-source builds are not supported. Please remove " + "CMakeCache.txt from the 'src' dir and configure an " + "out-of-source build in another directory.") +endif() + +# Set the build type to Release by default if not set +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +string(REPLACE " " ";" CMAKE_BRANSON_CXXFLAGS "${CMAKE_CXX_FLAGS}") + +option(ENABLE_CUDA "Use CUDA" FALSE) +option(ENABLE_HIP "Use HIP" FALSE) +option(ENABLE_CALIPER "Enable Caliper" FALSE) +option(ENABLE_OPENMP "Enable OpenMP" FALSE) +option(ENABLE_METIS "Enable METIS" FALSE) +option(ENABLE_VIZ "Enable VIZ" FALSE) + +set(CMAKE_VERBOSE_MAKEFILE ON) + +find_package(MPI REQUIRED) +list(APPEND branson_deps + MPI::MPI_CXX) + +############################################################################## +# OpenMP +############################################################################## +if(ENABLE_OPENMP) + message(STATUS "Looking for OpenMP...") + find_package(OpenMP REQUIRED) + if(OpenMP_FOUND) + message(STATUS "Looking for openmp......found ${OpenMP_CXX_FLAGS}") + set(USE_OPENMP ON) + list(APPEND branson_deps + OpenMP::OpenMP_CXX + ) endif() +else() + message(STATUS "Disabling OpenMP...") + set(USE_OPENMP OFF) endif() -message("GPU_DBS_STRING: ${GPU_DBS_STRING}") -project (BRANSON - VERSION 0.8 - DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition." -# HOMEPAGE URL "https://github.com/lanl/branson" # needs cmake 3.12+ - LANGUAGES CXX C ${GPU_DBS_STRING}) - -get_property(_LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES) - -message("Languages: ${_LANGUAGES_}") -if((_LANGUAGES_ MATCHES CUDA OR _LANGUAGES_ MATCHES HIP) AND USE_GPU) - message("CUDA/HIP module found (CUDA/HIP environment variables set) and USE_GPU is on, making GPU BRANSON") - add_compile_definitions(HAS_GPU) -elseif(USE_GPU) - message(FATAL_ERROR "CUDA/HIP module NOT found (CUDA/HIP environment variables set) but USE_GPU is on, reconfigure with USE_GPU off or fix modules") -elseif(NOT USE_GPU) - message("GPU mode not requested, making CPU only BRANSON") +if(ENABLE_CUDA) + enable_language(CXX) + + find_package(CUDAToolkit REQUIRED) + list(APPEND branson_deps + CUDA::cudart) + + include(CheckLanguage) + check_language(CUDA) + + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + + if(NOT CMAKE_CUDA_COMPILER) + message(FATAL_ERROR "Unable to find the nvcc compiler. Please use" + "CMAKE_CUDA_COMPILER to provide the nvcc compiler.") + endif() + + enable_language(CUDA) + set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) + set(CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCH}") + set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA) + set(CMAKE_CUDA_FLAGS "-g --expt-relaxed-constexpr ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "-expt-extended-lambda ${CMAKE_CUDA_FLAGS}") + list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU) + list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU) + + set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'") + + message("Making GPU(CUDA) BRANSON") + +elseif(ENABLE_HIP) + enable_language(CXX) + + if(NOT ROCM_PATH) + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") + else() + message(FATAL_ERROR "Unable to find rocm. Please use " + "ROCM_PATH to provide the rocm installation.") + endif() + endif() + + set(CMAKE_HIP_COMPILER "${ROCM_PATH}/llvm/bin/clang++") + list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib/cmake") + + find_package(hip REQUIRED) + list(APPEND branson_deps + hip::host + hip::device) + + include(CheckLanguage) + check_language(HIP) + + set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + + enable_language(HIP) + set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD}) + set(CMAKE_HIP_STANDARD_REQUIRED ON) + set(CMAKE_HIP_ARCHITECTURES "${HIP_ARCH}") + set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP) + set(CMAKE_HIP_FLAGS "-g ${CMAKE_HIP_FLAGS}") + list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU) + list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU) + + set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'") + + message("Making GPU(HIP) BRANSON") + else() - message("CUDA/HIP module not found and GPU mode not requested, making CPU only BRANSON") + message("Making CPU BRANSON") + endif() +message("GPU_DBS_STRING: ${GPU_DBS_STRING}") # Build system support files are located here. -set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config ) +set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config ${CMAKE_MODULE_PATH}) message( " This is ${CMAKE_PROJECT_NAME} v. ${PROJECT_VERSION}. @@ -88,24 +172,6 @@ string( REGEX REPLACE "([A-z0-9]+).*" "\\1" SITENAME ${SITENAME} ) include (lanl-setup) set( SITENAME ${SITENAME} CACHE "STRING" "Name of the current machine" FORCE) -#------------------------------------------------------------------------------# -# Setup compiler options -set( CXX_STANDARD_REQUIRED ON ) -set( CMAKE_CXX_STANDARD 17 ) - -# Do not enable extensions (e.g.: --std=gnu++11) -set( CMAKE_CXX_EXTENSIONS OFF ) -set( CMAKE_C_EXTENSIONS OFF ) - -#------------------------------------------------------------------------------# -# Build type and custom compiler flags - -if ( "${CMAKE_BUILD_TYPE}notset" STREQUAL "notset" AND - NOT DEFINED CMAKE_CONFIGURATION_TYPES ) - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release|Debug|RelWithDebInfo" - FORCE) -endif () - # GCC options for address or undefined sanitizing #set(GCC_SANITIZE_COMPILE_FLAGS "-fsanitize=address") #set(GCC_SANITIZE_LINK_FLAGS "-fsanitize=address") @@ -136,14 +202,17 @@ elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Cray" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -DR123_USE_GNU_UINT128=0") endif() +if(ENABLE_CUDA) + string(JOIN " " CMAKE_BRANSON_CUDAFLAGS ${CMAKE_BRANSON_CXXFLAGS}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${CMAKE_BRANSON_CUDAFLAGS}") +else() + add_compile_options(${CMAKE_BRANSON_CXXFLAGS}) +endif() #------------------------------------------------------------------------------# # Look for Third Party Libraries (metis, etc.) add_subdirectory(pugixml) -include(find_tpls) -setupTPLs() - #------------------------------------------------------------------------------# # Report build configuration @@ -166,83 +235,82 @@ else() endif() message( STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") message( STATUS "Compiler : ${CMAKE_CXX_COMPILER}") +string( REPLACE ";" " " CMAKE_BRANSON_CXXFLAGS_STR "${CMAKE_BRANSON_CXXFLAGS}") if( CMAKE_CONFIGURATION_TYPES ) - message( STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS}") + message( STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS} ${CMAKE_BRANSON_CXXFLAGS_STR}") message( STATUS "Compiler Flags (Debug) : ${CMAKE_CXX_FLAGS_DEBUG}") message( STATUS "Compiler Flags (Release): ${CMAKE_CXX_FLAGS_RELEASE}") else() string( TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER ) - message( STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") + message( STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_BRANSON_CXXFLAGS_STR} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") endif() message("\n") -#------------------------------------------------------------------------------# -# Set up libraries for Cray RMA routines - -# set(DMAPP_DYNAMIC -Wl,--whole-archive,-ldmapp,--no-whole-archive) - - -#------------------------------------------------------------------------------# -# Targets -file(GLOB headers *.h) -add_executable(BRANSON main.cc ${headers}) -target_include_directories( BRANSON PRIVATE - $ ${PROJECT_SOURCE_DIR}/pugixml/src/ ${PROJECT_SOURCE_DIR}/random123/ ) -set( branson_deps - MPI::MPI_CXX - MPI::MPI_C - pugixml) - -if(OpenMP_FOUND) - set(branson_deps "OpenMP::OpenMP_CXX;${branson_deps}") -endif() - -if(METIS_FOUND) - set( branson_deps "METIS::metis;${branson_deps}") -endif() - -if( VIZ_LIBRARIES_FOUND ) - set( branson_deps "Silo::silo;${HDF5_LIBRARIES};${branson_deps}" ) -endif() +list(APPEND branson_deps pugixml) + +############################################################################## +# caliper/adiak +############################################################################## +if(ENABLE_CALIPER) + message(STATUS "Looking for caliper...") + find_package(caliper REQUIRED) + if(caliper_FOUND) + message(STATUS "Looking for caliper......found ${caliper_DIR}") + list(APPEND branson_deps + caliper) + endif() -if(caliper_FOUND) - set( branson_deps "caliper;${branson_deps}") + message(STATUS "Looking for adiak...") + find_package(adiak REQUIRED) + if(adiak_FOUND) + message(STATUS "Looking for adiak......found ${adiak_DIR}") + list(APPEND branson_deps + adiak::adiak) + endif() +else() + message(STATUS "Disabling caliper...") endif() -#------------------------------------------------------------------------------# -# Generate config.h - -configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h) -#------------------------------------------------------------------------------# - -if("${GPU_DBS_STRING}" STREQUAL "CUDA" ) - message("Setting CUDA compiler options") - #set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "70") # V100 - set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "80") # A100 - set_target_properties(BRANSON PROPERTIES CUDA_STANDARD 17) - string(APPEND CMAKE_CUDA_FLAGS " -g --expt-relaxed-constexpr") - string(APPEND CMAKE_CUDA_FLAGS " --expt-extended-lambda" ) - set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA) -elseif("${GPU_DBS_STRING}" STREQUAL "HIP" ) - message("Setting HIP compiler options") - if(CMAKE_HIP_COMPILER MATCHES "CC") - set(CMAKE_HIP_FLAGS "-x hip") +############################################################################## +# metis +############################################################################## +if(ENABLE_METIS) + message(STATUS "Looking for METIS...") + find_package(METIS REQUIRED) + if(METIS_FOUND) + message(STATUS "Looking for METIS......found ${METIS_LIBRARIES}") + list(APPEND branson_deps + METIS::metis) endif() - set_target_properties(BRANSON PROPERTIES HIP_ARCHITECTURES "gfx942") # MI300 - set_target_properties(BRANSON PROPERTIES HIP_STANDARD 17) - string(APPEND CMAKE_HIP_FLAGS " -g -mllvm=--disable-peephole") - #string(APPEND CMAKE_HIP_FLAGS " --expt-extended-lambda" ) - set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP) else() - message("GPU Options: Not a GPU build or GPU_DBS_STRING not recognized") + message(STATUS "Disabling METIS...") endif() -target_link_libraries( BRANSON PRIVATE ${branson_deps} ) - -#------------------------------------------------------------------------------# -# Testing +############################################################################## +# Silo and HDF5 libraries +############################################################################## +if(ENABLE_VIZ) + message(STATUS "Looking for HDF5...") + find_package(HDF5 REQUIRED) + if(HDF5_FOUND) + list(GET HDF5_LIBRARIES 0 hdf5lib) + message(STATUS "Looking for HDF5..found ${hdf5lib}") + unset(hdf5lib) + message(STATUS "Looking for Silo...") + find_package(Silo REQUIRED) + if(Silo_FOUND) + message(STATUS "Looking for Silo..found ${Silo_LIBRARY}") + endif() + list(APPEND branson_deps + Silo::silo + ${HDF5_LIBRARIES} + ) + endif() +else () + message(STATUS "Disabling visualization libraries...") +endif() -option( BUILD_TESTING "Should we compile the tests?" ON ) +option(BUILD_TESTING "Should we compile the tests?" ON) if(BUILD_TESTING) enable_testing() add_subdirectory(test) @@ -253,11 +321,24 @@ else() "Building tests disabled, set BUILD_TESTING=TRUE or don't set BUILD_TESTING to enable test builds") endif() +#------------------------------------------------------------------------------# +# Generate config.h + +configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h) +#------------------------------------------------------------------------------# + +#------------------------------------------------------------------------------# +# Targets +file(GLOB headers *.h) +add_executable(BRANSON main.cc ${headers}) +target_link_libraries(BRANSON PUBLIC ${branson_deps}) +target_include_directories(BRANSON PRIVATE + ${CMAKE_BINARY_DIR} ${PROJECT_SOURCE_DIR}/pugixml/src/ ${HIP_INCLUDE_DIRS}) #------------------------------------------------------------------------------# # Targets for installation -install(TARGETS BRANSON DESTINATION bin) +install(TARGETS BRANSON DESTINATION ${CMAKE_INSTALL_BINDIR}) #------------------------------------------------------------------------------# # End src/CMakeLists.txt diff --git a/src/config/find_tpls.cmake b/src/config/find_tpls.cmake deleted file mode 100644 index 94d120f..0000000 --- a/src/config/find_tpls.cmake +++ /dev/null @@ -1,637 +0,0 @@ -#-----------------------------*-cmake-*----------------------------------------# -# file src/config/find_tpls.cmake -# author Kelly Thompson -# date Tuesday, Aug 14, 2018, 15:24 pm -# brief Look for third party libraries like metis -# note Copyright (C) 2018 Los Alamos National Security, LLC. -# All rights reserved. -#------------------------------------------------------------------------------# - -include( FeatureSummary ) - -# ------------------------------------------------------------------------------------------------ # -# Query CPU topology -# -# Returns: -# -# * MPI_CORES_PER_CPU -# * MPI_CPUS_PER_NODE -# * MPI_PHYSICAL_CORES -# * MPI_MAX_NUMPROCS_PHYSICAL -# * MPI_HYPERTHREADING -# -# See also: -# -# * Try running 'lstopo' for a graphical view of the local topology or 'lscpu' for a text version. -# * EAP's flags can be found in Test.rh/General/run_job.pl (look for $other_args). In particular, -# it may be useful to examine EAP's options for srun or aprun. -# ------------------------------------------------------------------------------------------------ -# # -macro(query_topology) - - # These cmake commands, while useful, don't provide the topology detail that we are interested in - # (i.e. number of sockets per node). We could use the results of these queries to know if - # hyper-threading is enabled (if logical != physical cores) - # - # * cmake_host_system_information(RESULT MPI_PHYSICAL_CORES QUERY NUMBER_OF_PHYSICAL_CORES) - # * cmake_host_system_information(RESULT MPI_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) - - # start with default values - set(MPI_CORES_PER_CPU 4) - set(MPI_PHYSICAL_CORES 1) - - if(SITENAME MATCHES "RZNevada" - OR SITENAME MATCHES "RZVernal" - OR SITENAME MATCHES "RZAdams") - set(MPI_CORES_PER_CPU 2) - set(MPI_PHYSICAL_CORES 64) - set(MPIEXEC_MAX_NUMPROCS - 64 - CACHE STRING "Max procs on node." FORCE) - elseif(SITENAME MATCHES "RZAdams") - set(MPI_CORES_PER_CPU 2) - set(MPI_PHYSICAL_CORES 96) - set(MPIEXEC_MAX_NUMPROCS - 96 - CACHE STRING "Max procs on node." FORCE) - elseif(EXISTS "/proc/cpuinfo") - # read the system's cpuinfo... - file(READ "/proc/cpuinfo" cpuinfo_data) - string(REGEX REPLACE "\n" ";" cpuinfo_data "${cpuinfo_data}") - foreach(line ${cpuinfo_data}) - if("${line}" MATCHES "cpu cores") - string(REGEX REPLACE ".* ([0-9]+).*" "\\1" MPI_CORES_PER_CPU "${line}") - elseif("${line}" MATCHES "physical id") - string(REGEX REPLACE ".* ([0-9]+).*" "\\1" tmp "${line}") - if(${tmp} GREATER ${MPI_PHYSICAL_CORES}) - set(MPI_PHYSICAL_CORES ${tmp}) - endif() - endif() - endforeach() - # correct 0-based indexing - math(EXPR MPI_PHYSICAL_CORES "${MPI_PHYSICAL_CORES} + 1") - endif() - - math(EXPR MPI_CPUS_PER_NODE "${MPIEXEC_MAX_NUMPROCS} / ${MPI_CORES_PER_CPU}") - set(MPI_CPUS_PER_NODE - ${MPI_CPUS_PER_NODE} - CACHE STRING "Number of multi-core CPUs per node" FORCE) - set(MPI_CORES_PER_CPU - ${MPI_CORES_PER_CPU} - CACHE STRING "Number of cores per cpu" FORCE) - - # - # Check for hyper-threading - This is important for reserving threads for OpenMP tests... - # - math(EXPR MPI_MAX_NUMPROCS_PHYSICAL "${MPI_PHYSICAL_CORES} * ${MPI_CORES_PER_CPU}") - if("${MPI_MAX_NUMPROCS_PHYSICAL}" STREQUAL "${MPIEXEC_MAX_NUMPROCS}") - set(MPI_HYPERTHREADING - "OFF" - CACHE BOOL "Are we using hyper-threading?" FORCE) - else() - set(MPI_HYPERTHREADING - "ON" - CACHE BOOL "Are we using hyper-threading?" FORCE) - endif() -endmacro() - -# cmake-lint: disable=R0912,R0915,W0106 -# -# * too many branches -# * function too long - -# ------------------------------------------------------------------------------------------------ # -# Set MPI flavor and vendor version -# -# Returns (as cache variables) -# -# * MPI_VERSION -# * MPI_FLAVOR = {openmpi, mpich, cray, spectrum, mvapich2, intel} -# -# ------------------------------------------------------------------------------------------------ # -function(setMPIflavorVer) - - # First attempt to determine MPI flavor -- scape flavor from full path (this ususally works for - # HPC or systems with modules) - if(CMAKE_CXX_COMPILER_WRAPPER STREQUAL CrayPrgEnv OR "$ENV{LOADEDMODULES}" MATCHES "cray-mpich") - set(MPI_FLAVOR "cray") - elseif( - "${MPIEXEC_EXECUTABLE}" MATCHES "openmpi" - OR "${MPIEXEC_EXECUTABLE}" MATCHES "smpi" - OR ("${MPIEXEC_EXECUTABLE}" MATCHES "srun" AND "${MPI_C_COMPILER}" MATCHES "openmpi")) - set(MPI_FLAVOR "openmpi") - elseif("${MPIEXEC_EXECUTABLE}" MATCHES "mpich" OR "${MPI_C_HEADER_DIR}" MATCHES "mpich") - set(MPI_FLAVOR "mpich") - elseif("${MPIEXEC_EXECUTABLE}" MATCHES "impi" OR "${MPIEXEC_EXECUTABLE}" MATCHES "clusterstudio") - set(MPI_FLAVOR "intel") - elseif("${MPIEXEC_EXECUTABLE}" MATCHES "mvapich2") - set(MPI_FLAVOR "mvapich2") - elseif( - "${MPIEXEC_EXECUTABLE}" MATCHES "spectrum-mpi" - OR "${MPIEXEC_EXECUTABLE}" MATCHES "lrun" - OR "${MPIEXEC_EXECUTABLE}" MATCHES "jsrun") - set(MPI_FLAVOR "spectrum") - endif() - - if(CMAKE_CXX_COMPILER_WRAPPER STREQUAL CrayPrgEnv) - if(DEFINED ENV{CRAY_MPICH2_VER}) - set(MPI_VERSION $ENV{CRAY_MPICH2_VER}) - endif() - elseif(DEFINED ENV{LMOD_MPI_VERSION}) - # Toss3 with srun - string(REGEX REPLACE "-[a-z0-9]+" "" MPI_VERSION "$ENV{LMOD_MPI_VERSION}") - elseif(DEFINED ENV{LMOD_FAMILY_MPI_VERSION}) - # ATS-2 with lrun - string(REGEX REPLACE "-[a-z0-9]+" "" MPI_VERSION "$ENV{LMOD_FAMILY_MPI_VERSION}") - else() - execute_process( - COMMAND ${MPIEXEC_EXECUTABLE} --version - OUTPUT_VARIABLE DBS_MPI_VER_OUT - ERROR_VARIABLE DBS_MPI_VER_ERR) - set(DBS_MPI_VER "${DBS_MPI_VER_OUT}${DBS_MPI_VER_ERR}") - string(REPLACE "\n" ";" TEMP ${DBS_MPI_VER}) - foreach(line ${TEMP}) - # extract the version... - if(${line} MATCHES "Version" - OR ${line} MATCHES "OpenRTE" - OR ${line} MATCHES "Open MPI" - OR ${line} MATCHES "Spectrum MPI") - set(DBS_MPI_VER "${line}") - if("${DBS_MPI_VER}" MATCHES "[0-9]+[.][0-9]+[.][0-9]+") - string(REGEX REPLACE ".*([0-9]+)[.]([0-9]+)[.]([0-9]+).*" "\\1" DBS_MPI_VER_MAJOR - ${DBS_MPI_VER}) - string(REGEX REPLACE ".*([0-9]+)[.]([0-9]+)[.]([0-9]+).*" "\\2" DBS_MPI_VER_MINOR - ${DBS_MPI_VER}) - string(REGEX REPLACE ".*([0-9]+)[.]([0-9]+)[.]([0-9]+).*" "\\3" DBS_MPI_VER_PATCH - ${DBS_MPI_VER}) - set(MPI_VERSION "${DBS_MPI_VER_MAJOR}.${DBS_MPI_VER_MINOR}.${DBS_MPI_VER_PATCH}") - elseif("${DBS_MPI_VER}" MATCHES "[0-9]+[.][0-9]+") - string(REGEX REPLACE ".*([0-9]+)[.]([0-9]+).*" "\\1" DBS_MPI_VER_MAJOR ${DBS_MPI_VER}) - string(REGEX REPLACE ".*([0-9]+)[.]([0-9]+).*" "\\2" DBS_MPI_VER_MINOR ${DBS_MPI_VER}) - set(MPI_VERSION "${DBS_MPI_VER_MAJOR}.${DBS_MPI_VER_MINOR}") - endif() - endif() - - # if needed, make a 2nd pass at identifying the MPI flavor - if(NOT DEFINED MPI_FLAVOR) - if("${line}" MATCHES "HYDRA") - set(MPI_FLAVOR "mpich") - elseif("${line}" MATCHES "OpenRTE") - set(MPI_FLAVOR "openmpi") - elseif("${line}" MATCHES "intel-mpi" OR "${line}" MATCHES "Intel[(]R[)] MPI Library") - set(MPI_FLAVOR "intel") - endif() - endif() - - # Once we have the needed information stop parsing... - if(DEFINED MPI_FLAVOR AND DEFINED MPI_VERSION) - break() - endif() - endforeach() - - endif() - - # if the FindMPI.cmake module didn't set the version, then try to do so here. - if(NOT DEFINED MPI_VERSION AND DEFINED MPI_C_VERSION) - set(MPI_VERSION ${MPI_C_VERSION}) - endif() - - # Return the discovered values to the calling scope - if(DEFINED MPI_FLAVOR) - set(MPI_FLAVOR - "${MPI_FLAVOR}" - CACHE STRING "Vendor brand of MPI" FORCE) - endif() - if(DEFINED MPI_VERSION) - set(MPI_VERSION - "${MPI_VERSION}" - CACHE STRING "Vendor version of MPI" FORCE) - endif() - -endfunction() - -# ------------------------------------------------------------------------------------------------ # -# Setup OpenMPI -# ------------------------------------------------------------------------------------------------ # -macro(setupOpenMPI) - - # sanity check, these OpenMPI flags (below) require version >= 1.8 - if(MPI_VERSION VERSION_LESS 1.8) - message(FATAL_ERROR "OpenMPI version < 1.8 found.") - endif() - - # Find cores/cpu, cpu/node, hyper-threading - query_topology() - - # Extra options provided from the environment or by cmake - if(DEFINED ENV{MPIEXEC_PREFLAGS}) - set(MPIEXEC_PREFLAGS "$ENV{MPIEXEC_PREFLAGS}") - endif() - - if("${MPIEXEC_EXECUTABLE}" MATCHES "srun") - set(preflags " --overlap") # -N 1 --cpu_bind=verbose,cores - set(MPIEXEC_PREFLAGS ${preflags}) - set(MPIEXEC_PREFLAGS_PERFBENCH ${preflags}) - set(MPIEXEC_OMP_PREFLAGS "${MPIEXEC_PREFLAGS} -c ${MPI_CORES_PER_CPU}") - else() - # Notes: - # - # * For PERFBENCH that use Quo, we need '--map-by socket:SPAN' instead of '-bind-to none'. The - # 'bind-to none' is required to pack a node. - # * Adding '--debug-daemons' is often requested by the OpenMPI dev team in conjunction with - # 'export OMPI_MCA_btl_base_verbose=100' to obtain debug traces from openmpi. - set(MPIEXEC_PREFLAGS_PERFBENCH "${MPIEXEC_PREFLAGS} --map-by socket:SPAN") - if(NOT MPIEXEC_PREFLAGS MATCHES " -bind-to none") - string(APPEND MPIEXEC_PREFLAGS " -bind-to none") - endif() - # Setup for OMP plus MPI - if((NOT APPLE) - AND (NOT MPIEXEC_OMP_PREFLAGS MATCHES "--map-by ppr") - AND (MPI_VERSION VERSION_LESS 5.0)) - # -bind-to fails on OSX, See #691 OpenMPI version 5.0+ doesn't use this ppr syntax. - set(MPIEXEC_OMP_PREFLAGS - "${MPIEXEC_PREFLAGS} --map-by ppr:${MPI_CORES_PER_CPU}:socket --report-bindings") - endif() - - # Spectrum-MPI on darwin - # - # Limit communication to on-node via '-intra sm' or 'intra vader' - # https://www.ibm.com/support/knowledgecenter/SSZTET_EOS/eos/guide_101.pdf - if("${MPIEXEC_EXECUTABLE}" MATCHES "smpi" AND NOT MPIEXEC_PREFLAGS MATCHES "-intra sm") - string(REPLACE "-bind-to none" "-bind-to core" MPIEXEC_PREFLAGS ${MPIEXEC_PREFLAGS}) - # string(REPLACE "-bind-to none" "-bind-to core" MPIEXEC_OMP_PREFLAGS ${MPIEXEC_OMP_PREFLAGS}) - set(smpi-sm-only "-intra sm -aff off --report-bindings") - string(APPEND MPIEXEC_PREFLAGS " ${smpi-sm-only}") - string(APPEND MPIEXEC_OMP_PREFLAGS " ${smpi-sm-only}") - unset(smpi-sm-only) - endif() - endif() - - # Cache the result - set(MPIEXEC_PREFLAGS - "${MPIEXEC_PREFLAGS}" - CACHE STRING "extra mpirun flags (list)." FORCE) - set(MPIEXEC_PREFLAGS_PERFBENCH - "${MPIEXEC_PREFLAGS_PERFBENCH}" - CACHE STRING "extra mpirun flags (list)." FORCE) - set(MPIEXEC_OMP_PREFLAGS - "${MPIEXEC_OMP_PREFLAGS}" - CACHE STRING "extra mpirun flags (list)." FORCE) - - mark_as_advanced(MPI_CPUS_PER_NODE MPI_CORES_PER_CPU MPI_PHYSICAL_CORES MPI_MAX_NUMPROCS_PHYSICAL - MPI_HYPERTHREADING) - -endmacro() - -macro(setupTPLs) - - ############################################################################## - # MPI - ############################################################################## - if( NOT TARGET MPI::MPI_C ) - #message(STATUS "Looking for MPI...") - #find_package(MPI QUIET REQUIRED) - #if( ${MPI_C_FOUND} ) - # message(STATUS "Looking for MPI...${MPIEXEC}") - #else() - # message(STATUS "Looking for MPI...not found") - #endif() - - #set_package_properties( MPI PROPERTIES - # URL "http://www.open-mpi.org/" - # DESCRIPTION "A High Performance Message Passing Library" - # TYPE REQUIRED - # PURPOSE "A parallel communication library is required in BRANSON.") - message(STATUS "Looking for MPI...") - - # Preserve data that may already be set. - if(DEFINED ENV{MPIRUN}) - set(MPIEXEC_EXECUTABLE - $ENV{MPIRUN} - CACHE STRING "Program to execute MPI parallel programs.") - elseif(DEFINED ENV{MPIEXEC_EXECUTABLE}) - set(MPIEXEC_EXECUTABLE - $ENV{MPIEXEC_EXECUTABLE} - CACHE STRING "Program to execute MPI parallel programs.") - elseif(DEFINED ENV{MPIEXEC}) - set(MPIEXEC_EXECUTABLE - $ENV{MPIEXEC} - CACHE STRING "Program to execute MPI parallel programs.") - endif() - - # If this is a Cray system and the Cray MPI compile wrappers are used, or if this is CTS-1 with - # Toss3, then do some special setup: - - if(CMAKE_CXX_COMPILER_WRAPPER MATCHES CrayPrgEnv OR IS_DIRECTORY "/usr/projects/hpcsoft/toss3/") - if(NOT EXISTS ${MPIEXEC_EXECUTABLE}) - find_program(MPIEXEC_EXECUTABLE flux) # 1st option is flux - if(EXISTS ${MPIEXEC_EXECUTABLE}) - execute_process( - COMMAND ${MPIEXEC_EXECUTABLE} jobs - RESULT_VARIABLE fluxfailure - OUTPUT_QUIET ERROR_QUIET) - if(NOT "${fluxfailure}" STREQUAL "0") - unset(MPIEXEC_EXECUTABLE CACHE) - endif() - endif() - if(NOT EXISTS ${MPIEXEC_EXECUTABLE}) - find_program(MPIEXEC_EXECUTABLE srun) # fall back to srun - endif() - endif() - if(MPIEXEC_EXECUTABLE MATCHES "flux") - set(MPIEXEC_NUMPROC_FLAG run;-n) - else() - set(MPIEXEC_NUMPROC_FLAG "-n") - endif() - set(MPIEXEC_NUMPROC_FLAG - "${MPIEXEC_NUMPROC_FLAG}" - CACHE STRING "mpirun flag used to specify the number of processors to use") - set(MPIEXEC_EXECUTABLE - ${MPIEXEC_EXECUTABLE} - CACHE STRING "Program to execute MPI parallel programs." FORCE) - - elseif(DEFINED ENV{SYS_TYPE} AND "$ENV{SYS_TYPE}" MATCHES "ppc64le_ib") # ATS-2 - if(NOT EXISTS ${MPIEXEC_EXECUTABLE}) - find_program(MPIEXEC_EXECUTABLE lrun) - endif() - set(MPIEXEC_EXECUTABLE - ${MPIEXEC_EXECUTABLE} - CACHE STRING "Program to execute MPI parallel programs." FORCE) - set(MPIEXEC_NUMPROC_FLAG - "--np" - CACHE STRING "mpirun flag used to specify the number of processors to use" FORCE) - endif() - - # Call the standard CMake FindMPI macro. - find_package(MPI QUIET) - - # Try to discover the MPI flavor and the vendor version. Returns MPI_VERSION, MPI_FLAVOR as - # cache variables - setmpiflavorver() - - # Set additional flags, environments that are MPI vendor specific. - if("${MPI_FLAVOR}" MATCHES "openmpi") - setupopenmpi() - elseif("${MPI_FLAVOR}" MATCHES "mpich") - setupmpichmpi() - elseif("${MPI_FLAVOR}" MATCHES "intel") - setupintelmpi() - elseif("${MPI_FLAVOR}" MATCHES "spectrum") - setupspectrummpi() - elseif("${MPI_FLAVOR}" MATCHES "cray") - setupCrayMPI() - else() - message( - FATAL_ERROR - " -The Draco build system doesn't know how to configure the build for - MPIEXEC_EXECUTABLE = ${MPIEXEC_EXECUTABLE} - DBS_MPI_VER = ${DBS_MPI_VER} - CMAKE_CXX_COMPILER_WRAPPER = ${CMAKE_CXX_COMPILER_WRAPPER}") - endif() - - # Mark some of the variables created by the above logic as 'advanced' so that they do not show - # up in the 'simple' ccmake view. - mark_as_advanced(MPI_EXTRA_LIBRARY MPI_LIBRARY) - - message(STATUS "Looking for MPI.......found ${MPIEXEC_EXECUTABLE}") - - # Sanity Checks for DRACO_C4==MPI - if("${MPI_CORES_PER_CPU}x" STREQUAL "x") - message(FATAL_ERROR "setupMPILibrariesUnix:: MPI_CORES_PER_CPU " "is not set!") - endif() - - set_package_properties( - MPI PROPERTIES - URL "http://www.open-mpi.org/" - DESCRIPTION "A High Performance Message Passing Library" - TYPE RECOMMENDED - PURPOSE "If not available, all Draco components will be built as scalar applications.") - - mark_as_advanced(MPIEXEC_OMP_PREFLAGS MPI_LIBRARIES) - - endif() - - ############################################################################## - # OpenMP - ############################################################################## - message(STATUS "Looking for Threads...") - find_package(Threads QUIET) - if(Threads_FOUND) - message(STATUS "Looking for Threads...found") - else() - message(STATUS "Looking for Threads...not found") - endif() - - message(STATUS "Looking for OpenMP...") - if(DEFINED USE_OPENMP) - # no-op (use defined value, -DUSE_OPENMP=, instead of attempting to guess) - else() - # Assume we want to use it if it is found. - set(USE_OPENMP ON) - endif() - set(USE_OPENMP - ${USE_OPENMP} - CACHE BOOL "Enable OpenMP threading support if detected." FORCE) - - # Find package if desired: - if(USE_OPENMP) - find_package(OpenMP QUIET) - else() - set(OpenMP_FOUND FALSE) - endif() - - if(OpenMP_FOUND) - # [2022-10-27 KT] cmake/3.22 doesn't report OpenMP_C_VERSION for nvc++. Fake it for now. - if("${OpenMP_C_VERSION}x" STREQUAL "x" AND CMAKE_CXX_COMPILER_ID MATCHES "NVHPC") - set(OpenMP_C_VERSION - "5.0" - CACHE BOOL "OpenMP version." FORCE) - set(OpenMP_FOUND TRUE) - endif() - message(STATUS "Looking for OpenMP... ${OpenMP_C_FLAGS} (supporting the ${OpenMP_C_VERSION} " - "standard)") - if(OpenMP_C_VERSION VERSION_LESS 3.0) - message(STATUS "OpenMP standard support is too old (< 3.0). Disabling OpenMP build features.") - set(OpenMP_FOUND FALSE) - set(OpenMP_C_FLAGS - "" - CACHE BOOL "OpenMP disabled (too old)." FORCE) - endif() - set(OpenMP_FOUND - ${OpenMP_FOUND} - CACHE BOOL "Is OpenMP available?" FORCE) - else() - if(USE_OPENMP) - # Not detected, though desired. - message(STATUS "Looking for OpenMP... not found") - else() - # Detected, but not desired. - message(STATUS "Looking for OpenMP... found, but disabled for this build") - endif() - endif() - message("CXX OPEN MP FLAGS: ${OpenMP_CXX_FLAGS}") - - ############################################################################## - # Caliper - ############################################################################## - if( NOT TARGET caliper ) - #============================================================================= - # If the user has provided ``CALIPER_ROOT_DIR``, use it! Choose items found - # at this location over system locations. - if( EXISTS "$ENV{CALIPER_ROOT_DIR}" ) - file( TO_CMAKE_PATH "$ENV{CALIPER_ROOT_DIR}" CALIPER_ROOT_DIR ) - set( CALIPER_ROOT_DIR "${CALIPER_ROOT_DIR}" CACHE PATH - "Prefix for Caliper installation." ) - endif() - - message( STATUS "Looking for caliper..." ) - find_package( caliper QUIET) - if( caliper_FOUND ) - message( STATUS "Looking for caliper.....found ${CALIPER_LIBRARY}" ) - else() - message( STATUS "Looking for caliper.....not found" ) - endif() - - set_package_properties( caliper PROPERTIES - DESCRIPTION "CALIPER" - TYPE OPTIONAL - URL "https://software.llnl.gov/Caliper/" - PURPOSE "Code instrumentation for performance analysis" - ) - - endif() - - ############################################################################## - # metis - # Load modules for metis to get correct environment variables - ############################################################################## - if( NOT TARGET METIS::metis ) - - message( STATUS "Looking for METIS..." ) - find_package( METIS QUIET) - if( METIS_FOUND ) - message( STATUS "Looking for METIS.....found ${METIS_LIBRARY}" ) - else() - message( STATUS "Looking for METIS.....not found" ) - endif() - - set_package_properties( METIS PROPERTIES - DESCRIPTION "METIS" - TYPE OPTIONAL - URL "http://glaros.dtc.umn.edu/gkhome/metis/metis/overview" - PURPOSE "METIS is a set of serial programs for partitioning graphs, - partitioning finite element meshes, and producing fill reducing orderings for - sparse matrices.") - - endif() - - ############################################################################## - # Silo and HDF5 libraries - # Load modules for hdf5 and solo to get correct environment variables - # use find package - ############################################################################## - - if( NOT HDF5_FOUND ) - - message( STATUS "Looking for HDF5..." ) - find_package( HDF5 QUIET ) - if( HDF5_FOUND ) - list(GET HDF5_LIBRARIES 0 hdf5lib) - message( STATUS "Looking for HDF5..found ${hdf5lib}" ) - unset(hdf5lib) - else() - message( STATUS "Looking for HDF5..not found" ) - endif() - - set_package_properties( HDF5 PROPERTIES - DESCRIPTION "HDF5 is a data model, library, and file format for storing - and managing data. It supports an unlimited variety of datatypes, and is - designed for flexible and efficient I/O and for high volume and complex - data." - TYPE OPTIONAL - URL "https://support.hdfgroup.org/HDF5/" - PURPOSE "Provides optional visualization support for Branson." ) - - endif() - - if( HDF5_FOUND AND NOT TARGET Silo::silo ) - - message( STATUS "Looking for Silo..." ) - find_package( Silo QUIET ) - if( Silo_FOUND ) - message( STATUS "Looking for Silo..found ${Silo_LIBRARY}" ) - else() - message( STATUS "Looking for Silo..not found" ) - endif() - - set_package_properties( Silo PROPERTIES - DESCRIPTION "Silo is a library for reading and writing a wide variety of - scientific data to binary, disk files." - TYPE OPTIONAL - URL "http://wci.llnl.gov/simulation/computer-codes/silo" - PURPOSE "Provides optional visualization support for Branson.") - - endif() - - if (HDF5_FOUND AND Silo_FOUND) - set(VIZ_LIBRARIES_FOUND TRUE) - else () - message(STATUS "Optional visualization libraries not loaded...skipping") - endif () - -endmacro() - -# ------------------------------------------------------------------------------------------------ # -# Setup Cray MPI and wrappers -# ------------------------------------------------------------------------------------------------ # -macro(setupCrayMPI) - - query_topology() - - # salloc/sbatch options: - # - # * -N limit job to a single node. - # * --gres=craynetwork:0 This option allows more than one srun to be running at the same time on - # the Cray. There are 4 gres “tokens” available. If unspecified, each srun invocation will - # consume all of them. Setting the value to 0 means consume none and allow the user to run as - # many concurrent jobs as there are cores available on the node. This should only be specified - # on the salloc/sbatch command. Gabe doesn't recommend this option for regression testing. - # * --vm-overcommit=disable|enable Do not allow overcommit of heap resources. - # * -p knl Limit allocation to KNL nodes. - # - # srun options: - # - # * --cpu_bind=verbose,cores Bind MPI ranks to cores and print a summary of binding when run - # * --exclusive This option will keep concurrent jobs from running on the same cores. If you want - # to background tasks to have them run simultaneously, this option is required to be set or they - # will stomp on the same cores. - # * --overlap Allow steps to overlap each other on the CPUs. By default steps do not share CPUs - # with other parallel steps. - # * --hint=nomultithread Disable use of hyperthreads (use only physical cores) - - set(preflags " ") # -N 1 --cpu_bind=verbose,cores - if(NOT MPIEXEC_EXECUTABLE MATCHES "flux") - if(DEFINED ENV{PE_PRODUCT_LIST} AND "$ENV{PE_PRODUCT_LIST}" MATCHES "CRAYPE_X86_SPR") - # ATS-3 - string(APPEND preflags " --overlap --cpu-bind=none --hint=nomultithread") - else() - string(APPEND preflags " --gres=craynetwork:0 --overlap") - endif() - endif() - set(MPIEXEC_PREFLAGS - ${preflags} - CACHE STRING "extra mpirun flags (list)." FORCE) - # consider adding '-m=cyclic' - set(MPIEXEC_PREFLAGS_PERFBENCH - ${preflags} - CACHE STRING "extra mpirun flags (list)." FORCE) - set(MPIEXEC_OMP_PREFLAGS - "${MPIEXEC_PREFLAGS} -c ${MPI_CORES_PER_CPU}" - CACHE STRING "extra mpirun flags (list)." FORCE) -endmacro() - - -#------------------------------------------------------------------------------# -# End find_tpls.cmake -#------------------------------------------------------------------------------# diff --git a/src/history_based_transport.h b/src/history_based_transport.h index 5a0a535..bc0e970 100644 --- a/src/history_based_transport.h +++ b/src/history_based_transport.h @@ -387,7 +387,7 @@ void gpu_transport_photons(const uint32_t rank_cell_offset, Constants::n_threads_per_block; auto sync_error = cudaDeviceSynchronize(); - if(!sync_error) std::cout<<"ERROR: DeviceSynchronize"<DeviceSynchronize (pre-kernel): " << sync_error << std::endl; std::cout << "Launching with " << n_blocks << " blocks and "; std::cout << n_batch_photons << " photons" << std::endl; @@ -396,7 +396,7 @@ void gpu_transport_photons(const uint32_t rank_cell_offset, Insist(!(cudaGetLastError()), "CUDA error in transport kernel launch"); sync_error = cudaDeviceSynchronize(); - if(!sync_error) std::cout<<"ERROR: DeviceSynchronize"<DeviceSynchronize (post-kernel): " << sync_error << std::endl; // copy particles back to host err = cudaMemcpy(cpu_photons.data(), device_photons_ptr, n_batch_photons * sizeof(Photon), diff --git a/src/main.cc b/src/main.cc index 5560725..bde7579 100644 --- a/src/main.cc +++ b/src/main.cc @@ -108,10 +108,14 @@ int main(int argc, char **argv) { if (input.get_dd_mode() == PARTICLE_PASS) { if( input.get_particle_storage() == AOS) { + wrapped_cali_mark_begin("particle pass soa"); imc_particle_pass_driver(mesh, imc_state, imc_p, mpi_types, mpi_info); + wrapped_cali_mark_end("particle pass soa"); } else if(input.get_particle_storage() == SOA) { + wrapped_cali_mark_begin("particle pass aos"); imc_particle_pass_driver>(mesh, imc_state, imc_p, mpi_types, mpi_info); + wrapped_cali_mark_end("particle pass aos"); } else { cout << "Driver for array currently not supported" << endl; @@ -120,10 +124,14 @@ int main(int argc, char **argv) { } else if (input.get_dd_mode() == REPLICATED) { if( input.get_particle_storage() == SOA) { + wrapped_cali_mark_begin("replicated soa"); imc_replicated_driver(mesh, imc_state, imc_p, mpi_types, mpi_info); + wrapped_cali_mark_end("replicated soa"); } else if(input.get_particle_storage() == AOS) { + wrapped_cali_mark_begin("replicated aos"); imc_replicated_driver>(mesh, imc_state, imc_p, mpi_types, mpi_info); + wrapped_cali_mark_end("replicated aos"); } else { cout << "Driver for array currently not supported" << endl; diff --git a/src/replicated_transport.h b/src/replicated_transport.h index b7e55dd..e031b99 100644 --- a/src/replicated_transport.h +++ b/src/replicated_transport.h @@ -75,8 +75,12 @@ Census_T replicated_transport( if(gpu_setup.use_gpu_transporter() && gpu_available ) { if constexpr(std::is_same_v>) { t_transport.start_timer("gpu transport"); + wrapped_cali_mark_begin("gpu transport photons"); gpu_transport_photons(rank_cell_offset, all_photons, gpu_setup.get_device_cells_ptr(), cell_tallies); + wrapped_cali_mark_end("gpu transport photons"); + wrapped_cali_mark_begin("post process photons"); auto batch_complete = post_process_photons(next_dt, all_photons, census_list, census_E, exit_E); + wrapped_cali_mark_end("post process photons"); t_transport.stop_timer("gpu transport"); std::cout<<"gpu transport time: "<