diff --git a/.buildkite/distributed/pipeline.yml b/.buildkite/distributed/pipeline.yml index bbb9b12446..0d61c54340 100644 --- a/.buildkite/distributed/pipeline.yml +++ b/.buildkite/distributed/pipeline.yml @@ -29,7 +29,7 @@ steps: - wait - - label: "🐉 cpu distributed unit tests" + - label: "🐉 cpu unit tests" key: "distributed_cpu" env: TEST_GROUP: "distributed" @@ -44,7 +44,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🐲 gpu distributed unit tests" + - label: "🐲 gpu unit tests" key: "distributed_gpu" env: TEST_GROUP: "distributed" @@ -60,7 +60,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🦾 cpu distributed solvers tests" + - label: "🦾 cpu solvers tests" key: "distributed_solvers_cpu" env: TEST_GROUP: "distributed_solvers" @@ -74,7 +74,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🛸 gpu distributed solvers tests" + - label: "🛸 gpu solvers tests" key: "distributed_solvers_gpu" env: TEST_GROUP: "distributed_solvers" @@ -90,7 +90,40 @@ steps: - exit_status: 1 limit: 1 - - label: "🤺 cpu distributed hydrostatic model tests" + - label: "🤿 cpu hydrostatic regression tests" + key: "distributed_hydrostatic_regression_cpu" + env: + TEST_GROUP: "distributed_hydrostatic_regression" + TEST_ARCHITECTURE: "CPU" + commands: + - "srun julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + timeout_in_minutes: 1440 + agents: + slurm_mem: 50G + slurm_ntasks: 4 + retry: + automatic: + - exit_status: 1 + limit: 1 + + - label: "🥽 gpu hydrostatic regression tests" + key: "distributed_hydrostatic_regression_gpu" + env: + TEST_GROUP: "distributed_hydrostatic_regression" + TEST_ARCHITECTURE: "GPU" + commands: + - "srun julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + timeout_in_minutes: 1440 + agents: + slurm_mem: 100G # Apparently the GPU tests require more memory + slurm_ntasks: 4 + slurm_gpus_per_task: 1 + retry: + automatic: + - exit_status: 1 + limit: 1 + + - label: "🤺 cpu hydrostatic model tests" key: "distributed_hydrostatic_model_cpu" env: TEST_GROUP: "distributed_hydrostatic_model" @@ -106,7 +139,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🦏 gpu distributed hydrostatic model tests" + - label: "🦏 gpu hydrostatic model tests" key: "distributed_hydrostatic_model_gpu" env: TEST_GROUP: "distributed_hydrostatic_model" @@ -123,7 +156,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🦍 cpu distributed nonhydrostatic regression" + - label: "🦍 cpu nonhydrostatic regression" key: "distributed_nonhydrostatic_regression_cpu" env: TEST_GROUP: "distributed_nonhydrostatic_regression" @@ -138,7 +171,7 @@ steps: - exit_status: 1 limit: 1 - - label: "🕺 gpu distributed nonhydrostatic regression" + - label: "🕺 gpu nonhydrostatic regression" key: "distributed_nonhydrostatic_regression_gpu" env: TEST_GROUP: "distributed_nonhydrostatic_regression" diff --git a/src/DistributedComputations/communication_buffers.jl b/src/DistributedComputations/communication_buffers.jl index 4334c112c2..1c9862406d 100644 --- a/src/DistributedComputations/communication_buffers.jl +++ b/src/DistributedComputations/communication_buffers.jl @@ -1,7 +1,7 @@ using Oceananigans.BoundaryConditions using Oceananigans.BoundaryConditions: FieldBoundaryConditions, BoundaryCondition using Oceananigans.BoundaryConditions: MultiRegionCommunication, DistributedCommunication -using Oceananigans.Grids: halo_size, size +using Oceananigans.Grids: halo_size, size, topology using Oceananigans.Utils: launch! using Adapt @@ -25,8 +25,51 @@ struct CommunicationBuffers{W, E, S, N, SW, SE, NW, NE} northeast :: NE end +Adapt.adapt_structure(to, buff::CommunicationBuffers) = + CommunicationBuffers(Adapt.adapt(to, buff.west), + Adapt.adapt(to, buff.east), + Adapt.adapt(to, buff.north), + Adapt.adapt(to, buff.south), + Adapt.adapt(to, buff.southwest), + Adapt.adapt(to, buff.southeast), + Adapt.adapt(to, buff.northwest), + Adapt.adapt(to, buff.northeast)) + +on_architecture(arch, buff::CommunicationBuffers) = + CommunicationBuffers(on_architecture(arch, buff.west), + on_architecture(arch, buff.east), + on_architecture(arch, buff.north), + on_architecture(arch, buff.south), + on_architecture(arch, buff.southwest), + on_architecture(arch, buff.southeast), + on_architecture(arch, buff.northwest), + on_architecture(arch, buff.northeast)) + communication_buffers(grid::DistributedGrid, data, boundary_conditions) = CommunicationBuffers(grid, data, boundary_conditions) +""" + CommunicationBuffers(grid, data, boundary_conditions) + +Construct communication buffers for distributed halo exchange. + +`CommunicationBuffers` stores send/receive buffers for each spatial direction and corner +in a distributed grid. During halo exchange, data is copied from the interior domain into +send buffers, communicated via MPI to neighboring processes, and then unpacked from receive +buffers into halo regions. + +# Buffer Types +Edge buffers (`west`, `east`, `south`, `north`) can be: +- `OneDBuffer`: For 1D parallelization or when at domain edges (includes corners) +- `TwoDBuffer`: For 2D parallelization interior processes (excludes corners) +- `nothing`: When no communication is needed in that direction + +Corner buffers (`southwest`, `southeast`, `northwest`, `northeast`) can be: +- `CornerBuffer`: For 2D parallelization where corners need separate communication +- `nothing`: For 1D parallelization or when corners are handled by edge buffers + +# See also +[`OneDBuffer`](@ref), [`TwoDBuffer`](@ref), [`CornerBuffer`](@ref) +""" function CommunicationBuffers(grid, data, boundary_conditions::FieldBoundaryConditions) Hx, Hy, _ = halo_size(grid) arch = architecture(grid) @@ -48,69 +91,150 @@ end CommunicationBuffers(grid, data, ::Missing) = nothing CommunicationBuffers(grid, data, ::Nothing) = nothing -# OneDBuffers are associated with partitioning without corner passing, -# therefore the "corner zones" are communicated within the one-dimensional pass. -const OneDBuffers = CommunicationBuffers{<:Any, <:Any, <:Any, <:Any, <:Nothing, <:Nothing, <:Nothing, <:Nothing} +""" + OneDBuffer{B} -x_communication_buffer(arch, grid, data, H, bc) = nothing -y_communication_buffer(arch, grid, data, H, bc) = nothing +Communication buffer for one-dimensional domain decomposition or edge boundaries. -# Only used for `Distributed` architectures -corner_communication_buffer(arch, grid, data, Hx, Hy, edge1, edge2) = nothing +In a one-dimensional parallelization (e.g., only in x or only in y), `OneDBuffer` +contains the full extent of the halo region including the corners. This allows corner +data to be communicated in a single pass along with the edge data. -# Disambiguation -corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, edge2) = nothing -corner_communication_buffer(::Distributed, grid, data, Hx, Hy, edge1, ::Nothing) = nothing -corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, ::Nothing) = nothing +`OneDBuffer` is also used in two-dimensional parallelizations for processes at domain +edges (e.g., boundaries with `RightConnected` or `LeftConnected` topologies), where +corner communication is not needed in that direction. -function corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, edge1, edge2) - return (send = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3))), - recv = on_architecture(arch, zeros(eltype(data), Hx, Hy, size(parent(data), 3)))) +# Size +For x-direction: `(Hx, Ty, Tz)` where `Hx` is the halo size in x, `Ty` includes all y points (with halos) +For y-direction: `(Tx, Hy, Tz)` where `Hy` is the halo size in y, `Tx` includes all x points (with halos) +""" +struct OneDBuffer{B} + send :: B + recv :: B end -function x_communication_buffer(arch::Distributed, grid, data, H, ::DCBC) - # Either we pass corners or it is a 1D parallelization in x - size_y = arch.ranks[2] == 1 ? size(parent(data), 2) : size(grid, 2) - return (send = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3))), - recv = on_architecture(arch, zeros(eltype(data), H, size_y, size(parent(data), 3)))) +""" + TwoDBuffer{B} + +Communication buffer for two-dimensional domain decomposition without corner regions. + +In a two-dimensional parallelization where corners are communicated separately via +`CornerBuffer`, `TwoDBuffer` contains only the edge halo region excluding the corners. +This enables efficient communication patterns where edge and corner data are sent in +separate MPI passes. + +# Size +For x-direction (west/east): `(Hx, Ny, Tz)` where `Hx` is the halo size in x, `Ny` is the interior size in y +For y-direction (south/north): `(Nx, Hy, Tz)` where `Nx` is the interior size in x, `Hy` is the halo size in y +""" +struct TwoDBuffer{B} + send :: B + recv :: B end -function y_communication_buffer(arch::Distributed, grid, data, H, ::DCBC) - # Either we pass corners or it is a 1D parallelization in y - size_x = arch.ranks[1] == 1 ? size(parent(data), 1) : size(grid, 1) - return (send = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3))), - recv = on_architecture(arch, zeros(eltype(data), size_x, H, size(parent(data), 3)))) +""" + CornerBuffer{B} + +Communication buffer for corner regions in two-dimensional domain decomposition. + +In a two-dimensional parallelization, `CornerBuffer` handles the communication of +diagonal corner regions (southwest, southeast, northwest, northeast) that are not +covered by the edge communication buffers (`TwoDBuffer`). Corner communication ensures +that all halo regions are properly synchronized between neighboring processes in both +x and y directions. + +# Size +`(Hx, Hy, Tz)` where `Hx` is the halo size in x, `Hy` is the halo size in y, and `Tz` is the size in z + +# Note +Corner buffers are only created for `Distributed` architectures with two-dimensional +parallelization and are `nothing` otherwise. +""" +struct CornerBuffer{B} + send :: B + recv :: B end -x_communication_buffer(arch, grid, data, H, ::MCBC) = - (send = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3))), - recv = on_architecture(arch, zeros(eltype(data), H, size(parent(data), 2), size(parent(data), 3)))) +# We never need to access buffers on the GPU! +Adapt.adapt_structure(to, buff::OneDBuffer) = nothing +Adapt.adapt_structure(to, buff::TwoDBuffer) = nothing +Adapt.adapt_structure(to, buff::CornerBuffer) = nothing -y_communication_buffer(arch, grid, data, H, ::MCBC) = - (send = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3))), - recv = on_architecture(arch, zeros(eltype(data), size(parent(data), 1), H, size(parent(data), 3)))) +#### +#### X and Y communication buffers +#### -Adapt.adapt_structure(to, buff::CommunicationBuffers) = - CommunicationBuffers(Adapt.adapt(to, buff.west), - Adapt.adapt(to, buff.east), - Adapt.adapt(to, buff.north), - Adapt.adapt(to, buff.south), - Adapt.adapt(to, buff.southwest), - Adapt.adapt(to, buff.southeast), - Adapt.adapt(to, buff.northwest), - Adapt.adapt(to, buff.northeast)) +# Fallback +x_communication_buffer(arch, grid, data, H, bc) = nothing +y_communication_buffer(arch, grid, data, H, bc) = nothing -on_architecture(arch, buff::CommunicationBuffers) = - CommunicationBuffers(on_architecture(arch, buff.west), - on_architecture(arch, buff.east), - on_architecture(arch, buff.north), - on_architecture(arch, buff.south), - on_architecture(arch, buff.southwest), - on_architecture(arch, buff.southeast), - on_architecture(arch, buff.northwest), - on_architecture(arch, buff.northeast)) +function x_communication_buffer(arch::Distributed, grid::AbstractGrid{<:Any, TX, TY}, data, H, ::DCBC) where {TX, TY} + _, Ty, Tz = size(parent(data)) + Ny = size(grid, 2) + FT = eltype(data) + if (ranks(arch)[2] == 1) || (TY == RightConnected) || (TY == LeftConnected) + send = on_architecture(arch, zeros(FT, H, Ty, Tz)) + recv = on_architecture(arch, zeros(FT, H, Ty, Tz)) + return OneDBuffer(send, recv) + else + send = on_architecture(arch, zeros(FT, H, Ny, Tz)) + recv = on_architecture(arch, zeros(FT, H, Ny, Tz)) + return TwoDBuffer(send, recv) + end +end -fill_send_buffers!(c::OffsetArray, ::Nothing, grid) = nothing +function y_communication_buffer(arch::Distributed, grid::AbstractGrid{<:Any, TX, TY}, data, H, ::DCBC) where {TX, TY} + Tx, _, Tz = size(parent(data)) + FT = eltype(data) + Nx = size(grid, 1) + if (ranks(arch)[1] == 1) || (TX == RightConnected) || (TX == LeftConnected) + send = on_architecture(arch, zeros(FT, Tx, H, Tz)) + recv = on_architecture(arch, zeros(FT, Tx, H, Tz)) + return OneDBuffer(send, recv) + else + send = on_architecture(arch, zeros(FT, Nx, H, Tz)) + recv = on_architecture(arch, zeros(FT, Nx, H, Tz)) + return TwoDBuffer(send, recv) + end +end + +# Never pass corners in a MCBC. +function x_communication_buffer(arch, grid, data, H, ::MCBC) + _, Ty, Tz = size(parent(data)) + FT = eltype(data) + send = on_architecture(arch, zeros(FT, H, Ty, Tz)) + recv = on_architecture(arch, zeros(FT, H, Ty, Tz)) + return OneDBuffer(send, recv) +end + +function y_communication_buffer(arch, grid, data, H, ::MCBC) + Tx, _, Tz = size(parent(data)) + FT = eltype(data) + send = on_architecture(arch, zeros(FT, Tx, H, Tz)) + recv = on_architecture(arch, zeros(FT, Tx, H, Tz)) + return OneDBuffer(send, recv) +end + +##### +##### Corner communication buffers +##### + +# Only used for `Distributed` architectures +corner_communication_buffer(arch, grid, data, Hx, Hy, xedge, yedge) = nothing + +# Disambiguation +corner_communication_buffer(::Distributed, grid, data, Hx, Hy, ::Nothing, ::Nothing) = nothing +corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, ::Nothing, yedge) = nothing +corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, xedge, ::Nothing) = nothing + +# CornerBuffer are used only in the two-dimensional partitioning case, in all other cases they are equal to `nothing` +function corner_communication_buffer(arch::Distributed, grid, data, Hx, Hy, xedge, yedge) + Tz = size(parent(data), 3) + FT = eltype(data) + send = on_architecture(arch, zeros(FT, Hx, Hy, Tz)) + recv = on_architecture(arch, zeros(FT, Hx, Hy, Tz)) + return CornerBuffer(send, recv) +end """ fill_send_buffers!(c::OffsetArray, buffers::CommunicationBuffers, grid) @@ -121,29 +245,30 @@ function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid) Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _fill_west_send_buffer!(parent(c), buff, buff.west, Hx, Hy, Nx, Ny) - _fill_east_send_buffer!(parent(c), buff, buff.east, Hx, Hy, Nx, Ny) - _fill_south_send_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny) - _fill_north_send_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny) + _fill_west_send_buffer!(parent(c), buff.west, Hx, Hy, Nx, Ny) + _fill_east_send_buffer!(parent(c), buff.east, Hx, Hy, Nx, Ny) + _fill_south_send_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny) + _fill_north_send_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny) - _fill_southwest_send_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny) - _fill_southeast_send_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny) - _fill_northwest_send_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny) - _fill_northeast_send_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny) + _fill_southwest_send_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny) + _fill_southeast_send_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny) + _fill_northwest_send_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny) + _fill_northeast_send_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny) return nothing end +fill_send_buffers!(c::OffsetArray, ::Nothing, grid) = nothing fill_send_buffers!(c::OffsetArray, ::Nothing, grid, ::Val{:corners}) = nothing function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Val{:corners}) Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _fill_southwest_send_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny) - _fill_southeast_send_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny) - _fill_northwest_send_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny) - _fill_northeast_send_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny) + _fill_southwest_send_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny) + _fill_southeast_send_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny) + _fill_northwest_send_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny) + _fill_northeast_send_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny) return nothing end @@ -152,14 +277,14 @@ end ##### Single sided fill_send_buffers! ##### -fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = - _fill_west_send_buffer!(parent(c), buff, buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = - _fill_east_send_buffer!(parent(c), buff, buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = - _fill_south_send_buffer!(parent(c), buff, buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = - _fill_north_send_buffer!(parent(c), buff, buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = + _fill_west_send_buffer!(parent(c), buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = + _fill_east_send_buffer!(parent(c), buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = + _fill_south_send_buffer!(parent(c), buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = + _fill_north_send_buffer!(parent(c), buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Bottom) = nothing fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Top) = nothing @@ -171,16 +296,16 @@ function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, :: Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _fill_west_send_buffer!(parent(c), buff, buff.west, Hx, Hy, Nx, Ny) - _fill_east_send_buffer!(parent(c), buff, buff.east, Hx, Hy, Nx, Ny) + _fill_west_send_buffer!(parent(c), buff.west, Hx, Hy, Nx, Ny) + _fill_east_send_buffer!(parent(c), buff.east, Hx, Hy, Nx, Ny) end function fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::SouthAndNorth) Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _fill_south_send_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny) - _fill_north_send_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny) + _fill_south_send_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny) + _fill_north_send_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny) end fill_send_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::BottomAndTop) = nothing @@ -194,15 +319,15 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid) Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _recv_from_west_buffer!(parent(c), buff, buff.west, Hx, Hy, Nx, Ny) - _recv_from_east_buffer!(parent(c), buff, buff.east, Hx, Hy, Nx, Ny) - _recv_from_south_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny) - _recv_from_north_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny) + _recv_from_west_buffer!(parent(c), buff.west, Hx, Hy, Nx, Ny) + _recv_from_east_buffer!(parent(c), buff.east, Hx, Hy, Nx, Ny) + _recv_from_south_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny) + _recv_from_north_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny) - _recv_from_southwest_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny) - _recv_from_southeast_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny) - _recv_from_northwest_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny) - _recv_from_northeast_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny) + _recv_from_southwest_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny) + _recv_from_southeast_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny) + _recv_from_northwest_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny) + _recv_from_northeast_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny) return nothing end @@ -211,10 +336,10 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, :: Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _recv_from_southwest_buffer!(parent(c), buff, buff.southwest, Hx, Hy, Nx, Ny) - _recv_from_southeast_buffer!(parent(c), buff, buff.southeast, Hx, Hy, Nx, Ny) - _recv_from_northwest_buffer!(parent(c), buff, buff.northwest, Hx, Hy, Nx, Ny) - _recv_from_northeast_buffer!(parent(c), buff, buff.northeast, Hx, Hy, Nx, Ny) + _recv_from_southwest_buffer!(parent(c), buff.southwest, Hx, Hy, Nx, Ny) + _recv_from_southeast_buffer!(parent(c), buff.southeast, Hx, Hy, Nx, Ny) + _recv_from_northwest_buffer!(parent(c), buff.northwest, Hx, Hy, Nx, Ny) + _recv_from_northeast_buffer!(parent(c), buff.northeast, Hx, Hy, Nx, Ny) return nothing end @@ -223,14 +348,14 @@ end ##### Single sided recv_from_buffers! ##### -recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = - _recv_from_west_buffer!(parent(c), buff, buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = - _recv_from_east_buffer!(parent(c), buff, buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = - _recv_from_south_buffer!(parent(c), buff, buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) -recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = - _recv_from_north_buffer!(parent(c), buff, buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::West) = + _recv_from_west_buffer!(parent(c), buff.west, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::East) = + _recv_from_east_buffer!(parent(c), buff.east, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::South) = + _recv_from_south_buffer!(parent(c), buff.south, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) +recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::North) = + _recv_from_north_buffer!(parent(c), buff.north, halo_size(grid)[[1, 2]]..., size(grid)[[1, 2]]...) recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Bottom) = nothing recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::Top) = nothing @@ -242,8 +367,8 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, :: Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _recv_from_west_buffer!(parent(c), buff, buff.west, Hx, Hy, Nx, Ny) - _recv_from_east_buffer!(parent(c), buff, buff.east, Hx, Hy, Nx, Ny) + _recv_from_west_buffer!(parent(c), buff.west, Hx, Hy, Nx, Ny) + _recv_from_east_buffer!(parent(c), buff.east, Hx, Hy, Nx, Ny) return nothing end @@ -252,8 +377,8 @@ function recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, :: Hx, Hy, _ = halo_size(grid) Nx, Ny, _ = size(grid) - _recv_from_south_buffer!(parent(c), buff, buff.south, Hx, Hy, Nx, Ny) - _recv_from_north_buffer!(parent(c), buff, buff.north, Hx, Hy, Nx, Ny) + _recv_from_south_buffer!(parent(c), buff.south, Hx, Hy, Nx, Ny) + _recv_from_north_buffer!(parent(c), buff.north, Hx, Hy, Nx, Ny) return nothing end @@ -267,48 +392,44 @@ recv_from_buffers!(c::OffsetArray, buff::CommunicationBuffers, grid, ::BottomAnd for dir in (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast) _fill_send_buffer! = Symbol(:_fill_, dir, :_send_buffer!) _recv_from_buffer! = Symbol(:_recv_from_, dir, :_buffer!) - - @eval $_fill_send_buffer!(c, b, ::Nothing, args...) = nothing - @eval $_recv_from_buffer!(c, b, ::Nothing, args...) = nothing - @eval $_fill_send_buffer!(c, ::OneDBuffers, ::Nothing, args...) = nothing - @eval $_recv_from_buffer!(c, ::OneDBuffers, ::Nothing, args...) = nothing + @eval $_fill_send_buffer!(c, ::Nothing, args...) = nothing + @eval $_recv_from_buffer!(c, ::Nothing, args...) = nothing end ##### ##### 1D Parallelizations (cover corners with 1 MPI pass) ##### - _fill_west_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, :, :) - _fill_east_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, :, :) -_fill_south_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Hy:2Hy, :) -_fill_north_send_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Ny:Ny+Hy, :) + _fill_west_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, :, :) + _fill_east_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, :, :) +_fill_south_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Hy:2Hy, :) +_fill_north_send_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, :, 1+Ny:Ny+Hy, :) - _recv_from_west_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx, :, :) .= buff.recv - _recv_from_east_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, :, :) .= buff.recv -_recv_from_south_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, :, 1:Hy, :) .= buff.recv -_recv_from_north_buffer!(c, ::OneDBuffers, buff, Hx, Hy, Nx, Ny) = view(c, :, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv + _recv_from_west_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, 1:Hx, :, :) .= buff.recv + _recv_from_east_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, :, :) .= buff.recv +_recv_from_south_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, :, 1:Hy, :) .= buff.recv +_recv_from_north_buffer!(c, buff::OneDBuffer, Hx, Hy, Nx, Ny) = view(c, :, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv ##### -##### 2D Parallelizations (explicitly send corners) +##### 2D Parallelizations (explicitly send all corners) ##### - _fill_west_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Hy:Ny+Hy, :) - _fill_east_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:Ny+Hy, :) -_fill_south_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Hy:2Hy, :) -_fill_north_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Ny:Ny+Hy, :) - - _recv_from_west_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1+Hy:Ny+Hy, :) .= buff.recv - _recv_from_east_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:Ny+Hy, :) .= buff.recv -_recv_from_south_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx, 1:Hy, :) .= buff.recv -_recv_from_north_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv - -_fill_southwest_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Hy:2Hy, :) -_fill_southeast_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:2Hy, :) -_fill_northwest_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Ny:Ny+Hy, :) -_fill_northeast_send_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Ny:Ny+Hy, :) - -_recv_from_southwest_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1:Hy, :) .= buff.recv -_recv_from_southeast_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1:Hy, :) .= buff.recv -_recv_from_northwest_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv -_recv_from_northeast_buffer!(c, b, buff, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv - + _fill_west_send_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Hy:Ny+Hy, :) + _fill_east_send_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:Ny+Hy, :) +_fill_south_send_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Hy:2Hy, :) +_fill_north_send_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:Nx+Hx, 1+Ny:Ny+Hy, :) + + _recv_from_west_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1+Hy:Ny+Hy, :) .= buff.recv + _recv_from_east_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Hy:Ny+Hy, :) .= buff.recv +_recv_from_south_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx, 1:Hy, :) .= buff.recv +_recv_from_north_buffer!(c, buff::TwoDBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Hx:Nx+Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv + +_fill_southwest_send_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Hy:2Hy, :) +_fill_southeast_send_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Hy:2Hy, :) +_fill_northwest_send_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Hx:2Hx, 1+Ny:Ny+Hy, :) +_fill_northeast_send_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = buff.send .= view(c, 1+Nx:Nx+Hx, 1+Ny:Ny+Hy, :) + +_recv_from_southwest_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1:Hy, :) .= buff.recv +_recv_from_southeast_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1:Hy, :) .= buff.recv +_recv_from_northwest_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = view(c, 1:Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv +_recv_from_northeast_buffer!(c, buff::CornerBuffer, Hx, Hy, Nx, Ny) = view(c, 1+Nx+Hx:Nx+2Hx, 1+Ny+Hy:Ny+2Hy, :) .= buff.recv diff --git a/src/DistributedComputations/distributed_fields.jl b/src/DistributedComputations/distributed_fields.jl index 2545920f7b..a953eed9ef 100644 --- a/src/DistributedComputations/distributed_fields.jl +++ b/src/DistributedComputations/distributed_fields.jl @@ -62,13 +62,24 @@ function set!(u::DistributedField, V::Field) end end +# Fallback -> not implemented +synchronize_communication!(var) = throw(ArgumentError("`synchronize_communication!` not implemented for variables of type $(typeof(var))")) + +# Methods for types that do not require synchronization +synchronize_communication!(::AbstractField) = nothing +synchronize_communication!(::AbstractArray) = nothing +synchronize_communication!(::Number) = nothing +synchronize_communication!(::Nothing) = nothing + +# Distribute synchronize_communication! over tuples and named tuples +synchronize_communication!(t::Union{NamedTuple, Tuple}) = foreach(synchronize_communication!, t) """ synchronize_communication!(field) complete the halo passing of `field` among processors. """ -function synchronize_communication!(field) +function synchronize_communication!(field::DistributedField) arch = architecture(field.grid) # Wait for outstanding requests diff --git a/src/DistributedComputations/distributed_grids.jl b/src/DistributedComputations/distributed_grids.jl index 952dc9a0cb..9b3a82ca91 100644 --- a/src/DistributedComputations/distributed_grids.jl +++ b/src/DistributedComputations/distributed_grids.jl @@ -320,12 +320,14 @@ end function scatter_local_grids(global_grid::RectilinearGrid, arch::Distributed, local_size) x, y, z, topo, halo = scatter_grid_properties(global_grid) global_sz = global_size(arch, local_size) + global_sz = pop_flat_elements(global_sz, topo) return RectilinearGrid(arch, eltype(global_grid); size=global_sz, x=x, y=y, z=z, halo=halo, topology=topo) end function scatter_local_grids(global_grid::LatitudeLongitudeGrid, arch::Distributed, local_size) x, y, z, topo, halo = scatter_grid_properties(global_grid) global_sz = global_size(arch, local_size) + global_sz = pop_flat_elements(global_sz, topo) return LatitudeLongitudeGrid(arch, eltype(global_grid); size=global_sz, longitude=x, latitude=y, z=z, halo=halo, topology=topo, radius=global_grid.radius) end diff --git a/src/DistributedComputations/halo_communication.jl b/src/DistributedComputations/halo_communication.jl index 26bc7f641c..559882da67 100644 --- a/src/DistributedComputations/halo_communication.jl +++ b/src/DistributedComputations/halo_communication.jl @@ -84,8 +84,7 @@ fill_halo_regions!(field::DistributedField, args...; kwargs...) = args...; kwargs...) -function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, grid::DistributedGrid, buffers, args...; - fill_open_bcs=true, kwargs...) +function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, grid::DistributedGrid, args...; kwargs...) arch = architecture(grid) kernels!, bcs = get_boundary_kernels(boundary_conditions, c, grid, loc, indices) @@ -94,10 +93,10 @@ function fill_halo_regions!(c::OffsetArray, boundary_conditions, indices, loc, g outstanding_requests = length(arch.mpi_requests) for task = 1:number_of_tasks - fill_halo_event!(c, kernels![task], bcs[task], loc, grid, buffers, args...; kwargs...) + fill_halo_event!(c, kernels![task], bcs[task], loc, grid, args...; kwargs...) end - fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) + fill_corners!(c, arch.connectivity, indices, loc, arch, grid, args...; kwargs...) # We increment the request counter only if we have actually initiated the MPI communication. # This is the case only if at least one of the boundary conditions is a distributed communication diff --git a/test/runtests.jl b/test/runtests.jl index 6c7c2dd450..b498ad0080 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -182,12 +182,19 @@ CUDA.allowscalar() do include("test_distributed_poisson_solvers.jl") end - if group == :distributed_hydrostatic_model || group == :all + if group == :distributed_hydrostatic_regression || group == :all MPI.Initialized() || MPI.Init() # In case CUDA is not found, we reset CUDA and restart the julia session reset_cuda_if_necessary() archs = test_architectures() include("test_hydrostatic_regression.jl") + end + + if group == :distributed_hydrostatic_model || group == :all + MPI.Initialized() || MPI.Init() + # In case CUDA is not found, we reset CUDA and restart the julia session + reset_cuda_if_necessary() + archs = test_architectures() include("test_distributed_hydrostatic_model.jl") end @@ -232,7 +239,6 @@ CUDA.allowscalar() do end end - # Tests for Enzyme extension if group == :enzyme || group == :all @testset "Enzyme extension tests" begin