diff --git a/include/DirectSolver/DirectSolverTake/applySymmetryShift.inl b/include/DirectSolver/DirectSolverTake/applySymmetryShift.inl index b44779a8..fc2ccf4d 100644 --- a/include/DirectSolver/DirectSolverTake/applySymmetryShift.inl +++ b/include/DirectSolver/DirectSolverTake/applySymmetryShift.inl @@ -5,7 +5,7 @@ /* ----------------------- */ template -void DirectSolverTake::applySymmetryShiftInnerBoundary(Vector x) const +void DirectSolverTake::applySymmetryShiftInnerBoundary(Vector& x) const { const PolarGrid& grid = DirectSolver::grid_; const LevelCacheType& level_cache = DirectSolver::level_cache_; @@ -19,34 +19,38 @@ void DirectSolverTake::applySymmetryShiftInnerBoundary(Vector att = level_cache.att(); ConstVector art = level_cache.art(); - const int i_r = 1; + const int i_r = 1; + const int ntheta = grid.ntheta(); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - double h1 = grid.radialSpacing(i_r - 1); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + Kokkos::parallel_for( + "applySymmetryShiftInnerBoundary", Kokkos::RangePolicy(0, ntheta), + KOKKOS_LAMBDA(const int i_theta) { + const double h1 = grid.radialSpacing(i_r - 1); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff1 = 0.5 * (k1 + k2) / h1; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int bottom_left = grid.index(i_r - 1, i_theta_M1); - const int left = grid.index(i_r - 1, i_theta); - const int top_left = grid.index(i_r - 1, i_theta_P1); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); + const int bottom_left = grid.index(i_r - 1, i_theta_M1); + const int left = grid.index(i_r - 1, i_theta); + const int top_left = grid.index(i_r - 1, i_theta_P1); - x[center] -= (-coeff1 * (arr[center] + arr[left]) * x[left] /* Left */ - - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ - + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ - ); - } + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + + x[center] -= (-coeff1 * (arr[center] + arr[left]) * x[left] /* Left */ + - 0.25 * (art[left] + art[bottom]) * x[bottom_left] /* Bottom Left */ + + 0.25 * (art[left] + art[top]) * x[top_left] /* Top Left */ + ); + }); } template -void DirectSolverTake::applySymmetryShiftOuterBoundary(Vector x) const +void DirectSolverTake::applySymmetryShiftOuterBoundary(Vector& x) const { const PolarGrid& grid = DirectSolver::grid_; const LevelCacheType& level_cache = DirectSolver::level_cache_; @@ -58,34 +62,38 @@ void DirectSolverTake::applySymmetryShiftOuterBoundary(Vector att = level_cache.att(); ConstVector art = level_cache.art(); - const int i_r = grid.nr() - 2; + const int i_r = grid.nr() - 2; + const int ntheta = grid.ntheta(); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + Kokkos::parallel_for( + "applySymmetryShiftOuterBoundary", Kokkos::RangePolicy(0, ntheta), + KOKKOS_LAMBDA(const int i_theta) { + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff2 = 0.5 * (k1 + k2) / h2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int bottom_right = grid.index(i_r + 1, i_theta_M1); - const int right = grid.index(i_r + 1, i_theta); - const int top_right = grid.index(i_r + 1, i_theta_P1); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); - x[center] -= (-coeff2 * (arr[center] + arr[right]) * x[right] /* Right */ - + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ - - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ - ); - } + const int bottom_right = grid.index(i_r + 1, i_theta_M1); + const int right = grid.index(i_r + 1, i_theta); + const int top_right = grid.index(i_r + 1, i_theta_P1); + + x[center] -= (-coeff2 * (arr[center] + arr[right]) * x[right] /* Right */ + + 0.25 * (art[right] + art[bottom]) * x[bottom_right] /* Bottom Right */ + - 0.25 * (art[right] + art[top]) * x[top_right] /* Top Right */ + ); + }); } template -void DirectSolverTake::applySymmetryShift(Vector x) const +void DirectSolverTake::applySymmetryShift(Vector& x) const { const PolarGrid& grid = DirectSolver::grid_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; @@ -98,4 +106,6 @@ void DirectSolverTake::applySymmetryShift(Vector x) cons } applySymmetryShiftOuterBoundary(x); + + Kokkos::fence(); } diff --git a/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl b/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl index a144a9c6..6e86e089 100644 --- a/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_take #ifdef GMGPOLAR_USE_MUMPS // When using the MUMPS solver, the matrix is assembled in COO format. -static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static KOKKOS_INLINE_FUNCTION void updateMatrixElement(const SparseMatrixCOO& matrix, + int ptr, int offset, int row, int column, double value) { matrix.set_row_index(ptr + offset, row); matrix.set_col_index(ptr + offset, column); @@ -14,8 +14,8 @@ static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, - int column, double value) +static KOKKOS_INLINE_FUNCTION void updateMatrixElement(const SparseMatrixCSR& matrix, + int ptr, int offset, int row, int column, double value) { matrix.set_row_nz_index(row, offset, column); matrix.set_row_nz_entry(row, offset, value); @@ -26,10 +26,10 @@ static inline void updateMatrixElement(SparseMatrixCSR void DirectSolverTake::nodeBuildSolverMatrixTake(int i_r, int i_theta, const PolarGrid& grid, - bool DirBC_Interior, SystemMatrix& solver_matrix, + bool DirBC_Interior, const SystemMatrix& solver_matrix, ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta) + ConstVector& coeff_beta) const { using direct_solver_coo_mumps_take::updateMatrixElement; int ptr, offset; @@ -473,7 +473,6 @@ typename DirectSolverTake::SystemMatrix DirectSolverTake::grid_; const LevelCacheType& level_cache = DirectSolver::level_cache_; - const int num_omp_threads = DirectSolver::num_omp_threads_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; assert(validateSolverMatrixIndexing() && "Solver matrix indexing is inconsistent"); @@ -501,25 +500,36 @@ typename DirectSolverTake::SystemMatrix DirectSolverTake detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); -#pragma omp parallel num_threads(num_omp_threads) - { -/* Circle Section */ -#pragma omp for nowait - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, - coeff_beta); - } - } -/* Radial Section */ -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, - coeff_beta); - } - } - } + /* We split the loops into two regions to better respect the */ + /* access patterns of the smoother and improve cache locality. */ + + // The For loop matches circular access pattern */ + Kokkos::parallel_for( + "DirectSolverTake: BuildSolverMatrix (Circular)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, 0}, // Starting point of the index space + {grid.numberSmootherCircles(), grid.ntheta()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int i_r, const int i_theta) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, + coeff_beta); + }); + + /* For loop matches radial access pattern */ + Kokkos::parallel_for( + "DirectSolverTake: BuildSolverMatrix (Radial)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, grid.numberSmootherCircles()}, // Starting point of the index space + {grid.ntheta(), grid.nr()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_CLASS_LAMBDA(const int i_theta, const int i_r) { + nodeBuildSolverMatrixTake(i_r, i_theta, grid, DirBC_Interior, solver_matrix, arr, att, art, detDF, + coeff_beta); + }); + + Kokkos::fence(); return solver_matrix; } diff --git a/include/DirectSolver/DirectSolverTake/directSolverTake.h b/include/DirectSolver/DirectSolverTake/directSolverTake.h index 2c8a14e0..5b6dda62 100644 --- a/include/DirectSolver/DirectSolverTake/directSolverTake.h +++ b/include/DirectSolver/DirectSolverTake/directSolverTake.h @@ -60,6 +60,7 @@ class DirectSolverTake : public DirectSolver // Solver object (owns matrix if MUMPS, references if in-house solver). SystemSolver system_solver_; +public: // Constructs a symmetric solver matrix. SystemMatrix buildSolverMatrix(); @@ -70,9 +71,9 @@ class DirectSolverTake : public DirectSolver // symmetric_DBc(A) * solution = rhs - applySymmetryShift(rhs). // The correction modifies the rhs to account for the influence of the Dirichlet boundary conditions, // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. - void applySymmetryShift(Vector rhs) const; - void applySymmetryShiftInnerBoundary(Vector x) const; - void applySymmetryShiftOuterBoundary(Vector x) const; + void applySymmetryShift(Vector& rhs) const; + void applySymmetryShiftInnerBoundary(Vector& x) const; + void applySymmetryShiftOuterBoundary(Vector& x) const; // Verify that solver matrix indexing is valid. bool validateSolverMatrixIndexing() const; @@ -89,9 +90,9 @@ class DirectSolverTake : public DirectSolver int getStencilSize(int global_index) const; void nodeBuildSolverMatrixTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SystemMatrix& solver_matrix, ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + const SystemMatrix& solver_matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta) const; }; #include "applySymmetryShift.inl"