diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 3ea507c2..e09af360 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -23,6 +23,20 @@ class BatchedTridiagonalSolver assign(sub_diagonal_, T(0)); } + /* ------------------- */ + /* Accessors for sizes */ + /* ------------------- */ + + int matrixDimension() const + { + return matrix_dimension_; + } + + int batchCount() const + { + return batch_count_; + } + /* ---------------------------- */ /* Accessors for matrix entries */ /* ---------------------------- */ @@ -288,4 +302,4 @@ class BatchedTridiagonalSolver bool is_cyclic_; bool is_factorized_; -}; +}; \ No newline at end of file diff --git a/include/Smoother/SmootherGive/smootherGive.h b/include/Smoother/SmootherGive/smootherGive.h index 4aadd265..0d656c6d 100644 --- a/include/Smoother/SmootherGive/smootherGive.h +++ b/include/Smoother/SmootherGive/smootherGive.h @@ -2,26 +2,83 @@ #include "../smoother.h" +// The smoother implements the coupled circle-radial smoothing procedure. +// It performs iterative updates on different parts of the grid based +// on the circle/radial section of the grid and black/white line coloring. +// +// The smoother solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. +// - Using 'temp' isn't strictly necessary as all updates could be performed in place in 'x'. +// - The stencil is applied using the A-Give method. +// +// Solver and matrix structure: +// - The matrix A_sc is block tridiagonal due to the smoother-based +// grid indexing, which allows efficient line-wise factorization. +// - The inner boundary requires special handling because it +// contains an additional across-origin coupling, making it +// non-tridiagonal; therefore, a more general solver is used there. +// When using the MUMPS solver, the matrix is assembled in COO format. +// When using the in-house solver, the matrix is stored in CSR format. +// - Circular line matrices are cyclic tridiagonal due to angular +// periodicity, whereas radial line matrices are strictly tridiagonal. +// - Dirichlet boundary contributions in radial matrices are shifted +// into the right-hand side to make A_sc symmetric. + class SmootherGive : public Smoother { public: + // Constructs the coupled circle-radial smoother. + // Builds the A_sc smoother matrices and prepares the solvers. explicit SmootherGive(const PolarGrid& grid, const LevelCache& level_cache, const DomainGeometry& domain_geometry, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); + + // If MUMPS is enabled, this cleans up the inner boundary solver. ~SmootherGive() override; + // Performs one full coupled smoothing sweep: + // BC -> WC -> BR -> WR + // Parallel implementation using OpenMP: + // Scedule every 2nd/4th line in parallel to avoid race conditions arising from the A-Give distribution. + // Sceduling every 3rd line in parallel would also be possible, but is less natural for the 2 coloring. void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - void smoothingSequential(Vector x, ConstVector rhs, Vector temp); - void smoothingForLoop(Vector x, ConstVector rhs, Vector temp); - - // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix - // 'inner_boundary_circle_matrix_' due to the across-origin treatment. - // It isn't tridiagonal and thus it requires a more advanced solver. - // Note that circle_tridiagonal_solver_[0] is thus unused! - // Additionally 'circle_tridiagonal_solver_[index]' will refer to the circular line i_r = index and - // 'radial_tridiagonal_solver_[index] will refer to the radial line i_theta = index. + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial circle line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; DMUMPS_STRUC_C inner_boundary_mumps_solver_; @@ -29,10 +86,25 @@ class SmootherGive : public Smoother using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; #endif + // Sparse matrix for the non-tridiagonal inner boundary circle block. MatrixType inner_boundary_circle_matrix_; - std::vector> circle_tridiagonal_solver_; - std::vector> radial_tridiagonal_solver_; + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------- */ + /* Stencil definitions */ + /* ------------------- */ + + // Stencils encode neighborhood connectivity for A_sc matrix assembly. + // It is only used in the construction of COO/CSR matrices. + // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. + // The Stencil class stores the offset for each position. + // - Non-zero matrix indicesare obtained via `ptr + offset` + // - A offset value of `-1` means the position is not included in the stencil pattern. + // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices. // clang-format off const Stencil stencil_DB_ = { @@ -40,63 +112,74 @@ class SmootherGive : public Smoother -1, 0, -1, -1, -1, -1 }; - /* Circle Stencils */ - const Stencil circle_stencil_interior_ = { - -1, 2, -1, - -1, 0, -1, - -1, 1, -1 - }; const Stencil circle_stencil_across_origin_ = { -1, 3, -1, 1, 0, -1, -1, 2, -1 }; - /* Radial Stencils */ - const Stencil radial_stencil_interior_ = { - -1, -1, -1, - 1, 0, 2, - -1, -1, -1 - }; - const Stencil radial_stencil_next_outer_DB_ = { - -1, -1, -1, - 1, 0, -1, - -1, -1, -1 - }; - const Stencil radial_stencil_next_circular_smoothing_ = { - -1, -1, -1, - -1, 0, 1, - -1, -1, -1 - }; + // clang-format on - const Stencil& getStencil(int i_r) const; - int getNonZeroCountCircleAsc(const int i_r) const; - int getNonZeroCountRadialAsc(const int i_theta) const; + // Select correct stencil depending on the grid position. + const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ + // Number of nonzero A_sc entries. + int getNonZeroCountCircleAsc(int i_r) const; /* Only i_r = 0 implemented */ + // Obtain a ptr to index into COO matrices. + // It accumulates all stencil sizes within a line up to, but excluding the current node. + int getCircleAscIndex(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ - int getCircleAscIndex(const int i_r, const int i_theta) const; - int getRadialAscIndex(const int i_r, const int i_theta) const; + /* --------------- */ + /* Matrix assembly */ + /* --------------- */ + // Build all A_sc matrices for circle and radial smoothers. void buildAscMatrices(); - void buildAscCircleSection(const int i_r); - void buildAscRadialSection(const int i_theta); + // Build A_sc matrix block for a single circular line. + void buildAscCircleSection(int i_r); + // Build A_sc matrix block for a single radial line. + void buildAscRadialSection(int i_theta); + // Build A_sc for a specific node (i_r, i_theta) + void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& inner_boundary_circle_matrix, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, + double art, double detDF, double coeff_beta); + + /* ---------------------- */ + /* Orthogonal application */ + /* ---------------------- */ - void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, + // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) + // where x = u_sc and rhs = f_sc + void applyAscOrthoCircleSection(int i_r, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, + void applyAscOrthoRadialSection(int i_theta, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void solveCircleSection(const int i_r, Vector x, Vector temp, Vector solver_storage_1, - Vector solver_storage_2); - void solveRadialSection(const int i_theta, Vector x, Vector temp, Vector solver_storage); + /* ----------------- */ + /* Line-wise solvers */ + /* ----------------- */ + // Solve the linear system: + // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho + // Parameter mapping: + // x = u_sc (solution vector for section s and color c) + // temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) + // where: + // s in {Circle, Radial} denotes the smoother section type, + // c in {Black, White} denotes the line coloring. + void solveBlackCircleSection(Vector x, Vector temp); + void solveWhiteCircleSection(Vector x, Vector temp); + void solveBlackRadialSection(Vector x, Vector temp); + void solveWhiteRadialSection(Vector x, Vector temp); + + /* ----------------------------------- */ + /* Initialize and destroy MUMPS solver */ + /* ----------------------------------- */ #ifdef GMGPOLAR_USE_MUMPS + // Initialize sparse MUMPS solver with assembled COO matrix. void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); + // Release MUMPS internal memory and MPI structures. void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); #endif - - void nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, double arr, - double att, double art, double detDF, double coeff_beta); }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 60e5fbd3..ae7e8d57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -117,11 +117,12 @@ set(SMOOTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/smoother.cpp # SmootherGive + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/buildMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/matrixStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherGive.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherSolver.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/solveAscSystem.cpp # SmootherTake ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildMatrix.cpp diff --git a/src/Smoother/SmootherGive/smootherSolver.cpp b/src/Smoother/SmootherGive/applyAscOrtho.cpp similarity index 64% rename from src/Smoother/SmootherGive/smootherSolver.cpp rename to src/Smoother/SmootherGive/applyAscOrtho.cpp index f46321a2..d4ef5f1b 100644 --- a/src/Smoother/SmootherGive/smootherSolver.cpp +++ b/src/Smoother/SmootherGive/applyAscOrtho.cpp @@ -410,7 +410,7 @@ static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const Polar // "Right" is part of the radial Asc smoother matrices, // but is shifted over to the rhs to make the radial Asc smoother matrices symmetric. // Note that the circle Asc smoother matrices are symmetric by default. - // Note that rhs[right] contains the correct boundary value of u_D. + // Note that rhs[center] contains the correct boundary value of u_D. result[left] -= (-coeff1 * arr * rhs[center] /* Right */ ); } @@ -447,6 +447,7 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, const SmootherC { const double theta = grid_.theta(i_theta); + /* We need to obtain left contributions from the circular section for AscOrtho. */ /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { const double r = grid_.radius(i_r); @@ -459,253 +460,4 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, const SmootherC nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, detDF, coeff_beta); } -} - -void SmootherGive::solveCircleSection(const int i_r, Vector x, Vector temp, - Vector solver_storage_1, Vector solver_storage_2) -{ - const int start = grid_.index(i_r, 0); - const int end = start + grid_.ntheta(); - if (i_r == 0) { -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = temp.data() + start; - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } -#else - inner_boundary_lu_solver_.solveInPlace(temp.data() + start); -#endif - } - else { - circle_tridiagonal_solver_[i_r].solveInPlace(temp.data() + start, solver_storage_1.data(), - solver_storage_2.data()); - } - // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); -} - -void SmootherGive::solveRadialSection(const int i_theta, Vector x, Vector temp, - Vector solver_storage) -{ - const int start = grid_.index(grid_.numberSmootherCircles(), i_theta); - const int end = start + grid_.lengthSmootherRadial(); - - radial_tridiagonal_solver_[i_theta].solveInPlace(temp.data() + start, solver_storage.data()); - // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); -} - -/* ------------------ */ -/* Sequential Version */ -/* ------------------ */ - -void SmootherGive::smoothingSequential(Vector x, ConstVector rhs, Vector temp) -{ - assert(x.size() == rhs.size()); - assert(temp.size() == rhs.size()); - - Kokkos::deep_copy(temp, rhs); - - /* Single-threaded execution */ - Vector circle_solver_storage_1("circle_solver_storage_1", grid_.ntheta()); - Vector circle_solver_storage_2("circle_solver_storage_2", grid_.ntheta()); - Vector radial_solver_storage("radial_solver_storage", grid_.lengthSmootherRadial()); - - /* ---------------------------- */ - /* ------ CIRCLE SECTION ------ */ - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - for (int i_r = 0; i_r < grid_.numberSmootherCircles() + 1; i_r++) { - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - /* ---------------------------- */ - /* ------ RADIAL SECTION ------ */ - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } -} - -/* ------------------------------------ */ -/* Parallelization Version 1: For Loops */ -/* ------------------------------------ */ -// clang-format off -void SmootherGive::smoothingForLoop(Vector x, ConstVector rhs, Vector temp) -{ - assert(x.size() == rhs.size()); - assert(temp.size() == rhs.size()); - - if (num_omp_threads_ == 1) { - smoothingSequential(x, rhs, temp); - } - else { - Kokkos::deep_copy(temp,rhs); - - /* Multi-threaded execution */ - const int num_circle_tasks = grid_.numberSmootherCircles(); - const int num_radial_tasks = grid_.ntheta(); - - #pragma omp parallel num_threads(num_omp_threads_) - { - Vector circle_solver_storage_1("circle_solver_storage_1",grid_.ntheta()); - Vector circle_solver_storage_2("circle_solver_storage_2",grid_.ntheta()); - Vector radial_solver_storage("radial_solver_storage",grid_.lengthSmootherRadial()); - - /* ---------------------------- */ - /* ------ CIRCLE SECTION ------ */ - /* ---------------------------- */ - - /* ---------------------------- */ - /* Asc ortho Black Circle Tasks */ - - /* Inside Black Section */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - - /* Outside Black Section (Part 1)*/ - #pragma omp for - for (int circle_task = -1; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - - /* Outside Black Section (Part 2)*/ - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - - /* Black Circle Smoother */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Inside White Section */ - #pragma omp for nowait - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Inside Black Section */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Outside White Section (Part 1)*/ - #pragma omp for nowait - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Outside Black Section (Part 1) */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Outside White Section (Part 2)*/ - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Outside Black Section (Part 2) */ - #pragma omp for - for (int radial_task = 3; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* White Circle Smoother */ - #pragma omp for nowait - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - /* Black Radial Smoother */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - - /* Inside White Section */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 1) */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 2) */ - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - - /* White Radial Smoother */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - } - } -} -// clang-format on +} \ No newline at end of file diff --git a/src/Smoother/SmootherGive/buildMatrix.cpp b/src/Smoother/SmootherGive/buildMatrix.cpp index d4dcf733..d563beaf 100644 --- a/src/Smoother/SmootherGive/buildMatrix.cpp +++ b/src/Smoother/SmootherGive/buildMatrix.cpp @@ -3,14 +3,15 @@ #include "../../../include/Definitions/geometry_helper.h" /* Tridiagonal matrices */ -static inline void updateMatrixElement(SymmetricTridiagonalSolver& matrix, int row, int column, double value) +static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, + double value) { if (row == column) - matrix.main_diagonal(row) += value; + solver.main_diagonal(batch, row) += value; else if (row == column - 1) - matrix.sub_diagonal(row) += value; - else if (row == 0 && column == matrix.columns() - 1) - matrix.cyclic_corner_element() += value; + solver.sub_diagonal(batch, row) += value; + else if (row == 0 && column == solver.matrixDimension() - 1) + solver.cyclic_corner(batch) += value; } /* Inner Boundary COO/CSR matrix */ @@ -31,11 +32,11 @@ static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, in } #endif -void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, - double arr, double att, double art, double detDF, double coeff_beta) +void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& inner_boundary_circle_matrix, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, + double art, double detDF, double coeff_beta) { assert(i_r >= 0 && i_r < grid.nr()); assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -87,53 +88,55 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* | o | o | O || o o o o <- right_matrix */ /* | | | || -------------- */ /* | o | o | o || o o o o */ - auto& left_matrix = circle_tridiagonal_solver[i_r - 1]; - auto& center_matrix = circle_tridiagonal_solver[i_r]; - auto& right_matrix = (i_r + 1 == numberSmootherCircles) ? radial_tridiagonal_solver[i_theta] - : circle_tridiagonal_solver[i_r + 1]; + auto& left_solver = circle_tridiagonal_solver; + int left_batch = i_r - 1; + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& right_solver = (i_r + 1 == numberSmootherCircles) ? radial_tridiagonal_solver : circle_tridiagonal_solver; + int right_batch = (i_r + 1 == numberSmootherCircles) ? i_theta : i_r + 1; /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = bottom_index; value = -coeff3 * att; /* Bottom */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = top_index; value = -coeff4 * att; /* Top */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = center_index; value = -coeff3 * att; /* Top */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = center_index; value = -coeff4 * att; /* Bottom */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ if (!DirBC_Interior && i_r == 1) { @@ -144,21 +147,21 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& offset = LeftStencil[StencilPosition::Center]; col = left_index; - val = +coeff1 * arr; /* Center: (Right) */ + val = coeff1 * arr; /* Center: (Right) */ updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); } if (i_r > 1) { row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateMatrixElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); } /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } /* ------------------------------------------ */ /* Node in the interior of the Radial Section */ @@ -184,9 +187,12 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* ---------- */ /* o o o <- bottom_matrix */ /* ---------- */ - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1]; - auto& center_matrix = radial_tridiagonal_solver[i_theta]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1]; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; @@ -197,57 +203,57 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateMatrixElement(center_matrix, row, column, value); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = left_index; value = -coeff1 * arr; /* Left */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = right_index; value = -coeff2 * arr; /* Right */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = center_index; value = -coeff1 * arr; /* Right */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = center_index; value = -coeff2 * arr; /* Left */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateMatrixElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateMatrixElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } /* ------------------------------------------ */ /* Circle Section: Node in the inner boundary */ @@ -267,8 +273,8 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - auto& center_matrix = inner_boundary_circle_matrix; - auto& right_matrix = circle_tridiagonal_solver[i_r + 1]; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; const int center_index = i_theta; const int right_index = i_theta; @@ -290,7 +296,7 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } else { /* ------------------------------------------------------------- */ @@ -315,9 +321,8 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* -| O | o | o | */ /* -| | | | */ /* -| x | o | x | */ - auto& center_matrix = inner_boundary_circle_matrix; - auto& right_matrix = circle_tridiagonal_solver[i_r + 1]; - auto& left_matrix = inner_boundary_circle_matrix; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -390,7 +395,7 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; @@ -451,10 +456,14 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* | o | o | o || O o o o <- center_matrix */ /* | | | || -------------- */ /* | o | o | o || o o o o <- bottom_matrix */ - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1]; - auto& center_matrix = radial_tridiagonal_solver[i_theta]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1]; - auto& left_matrix = circle_tridiagonal_solver[i_r - 1]; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; + auto& left_solver = circle_tridiagonal_solver; + int left_batch = i_r - 1; const int center_index = i_r - numberSmootherCircles; const int left_index = i_theta; @@ -466,46 +475,46 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = right_index; value = -coeff2 * arr; /* Right */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateMatrixElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = center_index; value = -coeff2 * arr; /* Left */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateMatrixElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateMatrixElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } /* ------------------------------------------- */ /* Radial Section: Node next to outer boundary */ @@ -531,9 +540,12 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* ---------------|| */ /* o o o o || <- bottom_matrix */ /* ---------------|| */ - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1]; - auto& center_matrix = radial_tridiagonal_solver[i_theta]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1]; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; @@ -546,40 +558,40 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = left_index; value = -coeff1 * arr; /* Left */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = center_index; value = -coeff1 * arr; /* Right */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateMatrixElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateMatrixElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } /* ------------------------------------------ */ /* Radial Section: Node on the outer boundary */ @@ -597,7 +609,8 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& /* -----------|| */ /* o o o || */ /* -----------|| */ - auto& center_matrix = radial_tridiagonal_solver[i_theta]; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; @@ -606,13 +619,13 @@ void SmootherGive::nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& row = center_index; column = center_index; value = 1.0; - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateMatrixElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } } @@ -627,8 +640,8 @@ void SmootherGive::buildAscCircleSection(const int i_r) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildSmootherGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, + circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } @@ -643,177 +656,107 @@ void SmootherGive::buildAscRadialSection(const int i_theta) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildSmootherGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, + circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -// clang-format off void SmootherGive::buildAscMatrices() { /* -------------------------------------- */ /* Part 1: Allocate Asc Smoother matrices */ /* -------------------------------------- */ + // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. + // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - const int number_smoother_circles = grid_.numberSmootherCircles(); - const int length_smoother_radial = grid_.lengthSmootherRadial(); - - const int num_circle_nodes = grid_.ntheta(); - circle_tridiagonal_solver_.resize(number_smoother_circles); - - const int num_radial_nodes = length_smoother_radial; - radial_tridiagonal_solver_.resize(grid_.ntheta()); - - // Remark: circle_tridiagonal_solver_[0] is unitialized. - // Please use inner_boundary_circle_matrix_ instead! - #pragma omp parallel num_threads(num_omp_threads_) if (grid_.numberOfNodes() > 10'000) - { - // ---------------- // - // Circular Section // - #pragma omp for nowait - for (int circle_Asc_index = 0; circle_Asc_index < number_smoother_circles; circle_Asc_index++) { - - /* Inner boundary circle */ - if (circle_Asc_index == 0) { - #ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int nnz = getNonZeroCountCircleAsc(circle_Asc_index); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, nnz); - inner_boundary_circle_matrix_.is_symmetric(false); - for (int i = 0; i < nnz; i++) { - inner_boundary_circle_matrix_.value(i) = 0.0; - } - #else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior_? 1 : 4; - }; - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); - for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) { - inner_boundary_circle_matrix_.values_data()[i] = 0.0; - } - #endif - } - - /* Interior Circle Section */ - else { - auto& solverMatrix = circle_tridiagonal_solver_[circle_Asc_index]; - - solverMatrix = SymmetricTridiagonalSolver(num_circle_nodes); - solverMatrix.is_cyclic(true); - solverMatrix.cyclic_corner_element() = 0.0; - - for (int i = 0; i < num_circle_nodes; i++) { - solverMatrix.main_diagonal(i) = 0.0; - if (i < num_circle_nodes - 1) { - solverMatrix.sub_diagonal(i) = 0.0; - } - } - } - } - - // -------------- // - // Radial Section // - #pragma omp for nowait - for (int radial_Asc_index = 0; radial_Asc_index < grid_.ntheta(); radial_Asc_index++) { - auto& solverMatrix = radial_tridiagonal_solver_[radial_Asc_index]; - - solverMatrix = SymmetricTridiagonalSolver(num_radial_nodes); - solverMatrix.is_cyclic(false); - - for (int i = 0; i < num_radial_nodes; i++) { - solverMatrix.main_diagonal(i) = 0.0; - if (i < num_radial_nodes - 1) { - solverMatrix.sub_diagonal(i) = 0.0; - } - } - } - } +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int inner_i_r = 0; + const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); + inner_boundary_circle_matrix_.is_symmetric(false); +#else + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior_ ? 1 : 4; + }; + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); +#endif /* ---------------------------------- */ /* Part 2: Fill Asc Smoother matrices */ /* ---------------------------------- */ - if (num_omp_threads_ == 1) { - /* Single-threaded execution */ - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + /* Multi-threaded execution: */ + const int num_smoother_circles = grid_.numberSmootherCircles(); + const int additional_radial_tasks = grid_.ntheta() % 3; + const int num_radial_tasks = grid_.ntheta() - additional_radial_tasks; + +#pragma omp parallel num_threads(num_omp_threads_) + { +#pragma omp for + for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { buildAscCircleSection(i_r); } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); +#pragma omp for + for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { + buildAscCircleSection(i_r); + } +#pragma omp for + for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { + buildAscCircleSection(i_r); } - } - else { - /* Multi-threaded execution: For Loops */ - const int num_circle_tasks = grid_.numberSmootherCircles(); - const int additional_radial_tasks = grid_.ntheta() % 3; - const int num_radial_tasks = grid_.ntheta() - additional_radial_tasks; - - #pragma omp parallel num_threads(num_omp_threads_) - { - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(0); - } - else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); - } - } +#pragma omp for + for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { + if (radial_task > 0) { + int i_theta = radial_task + additional_radial_tasks; + buildAscRadialSection(i_theta); } - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + else { + if (additional_radial_tasks == 0) { + buildAscRadialSection(0); } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(1); - } - else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); - } - else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); - } + else if (additional_radial_tasks >= 1) { + buildAscRadialSection(0); + buildAscRadialSection(1); } } - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + } +#pragma omp for + for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { + if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; buildAscRadialSection(i_theta); } + else { + if (additional_radial_tasks == 0) { + buildAscRadialSection(1); + } + else if (additional_radial_tasks == 1) { + buildAscRadialSection(2); + } + else if (additional_radial_tasks == 2) { + buildAscRadialSection(2); + buildAscRadialSection(3); + } + } + } +#pragma omp for + for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + int i_theta = radial_task + additional_radial_tasks; + buildAscRadialSection(i_theta); } } - #ifdef GMGPOLAR_USE_MUMPS + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + +#ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------ */ /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); const int nnz = full_matrix.non_zero_size(); @@ -835,6 +778,5 @@ void SmootherGive::buildAscMatrices() current_nz++; } } - #endif +#endif } -// clang-format on diff --git a/src/Smoother/SmootherGive/matrixStencil.cpp b/src/Smoother/SmootherGive/matrixStencil.cpp index 9fb62fea..ccda15a5 100644 --- a/src/Smoother/SmootherGive/matrixStencil.cpp +++ b/src/Smoother/SmootherGive/matrixStencil.cpp @@ -1,118 +1,35 @@ #include "../../../include/Smoother/SmootherGive/smootherGive.h" +#include "../../../include/Smoother/SmootherTake/smootherTake.h" + const Stencil& SmootherGive::getStencil(int i_r) const { - - assert(0 <= i_r && i_r < grid_.nr()); - - assert(grid_.numberSmootherCircles() >= 2); - assert(grid_.lengthSmootherRadial() >= 3); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - if (i_r < numberSmootherCircles) { - /* Circle Section */ - if (i_r > 0 && i_r < numberSmootherCircles) { - return circle_stencil_interior_; - } - else if (i_r == 0) { - return DirBC_Interior_ ? stencil_DB_ : circle_stencil_across_origin_; - } + if (i_r == 0) { + return DirBC_Interior_ ? stencil_DB_ : circle_stencil_across_origin_; } else { - /* Radial Section */ - if (i_r > numberSmootherCircles && i_r < grid_.nr() - 2) { - return radial_stencil_interior_; - } - else if (i_r == numberSmootherCircles) { - return radial_stencil_next_circular_smoothing_; - } - else if (i_r == grid_.nr() - 1) { - return stencil_DB_; - } - else if (i_r == grid_.nr() - 2) { - return radial_stencil_next_outer_DB_; - } + throw std::out_of_range("getStencil: Only i_r = 0 implemented."); } - throw std::out_of_range("Invalid index for stencil"); } -int SmootherGive::getNonZeroCountCircleAsc(const int i_r) const +int SmootherGive::getNonZeroCountCircleAsc(int i_r) const { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; - const int size_stencil_interior = 3; - - if (i_r > 0) { - return size_stencil_interior * grid_.ntheta(); - } - else if (i_r == 0) { + if (i_r == 0) { + const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; return size_stencil_inner_boundary * grid_.ntheta(); } - throw std::out_of_range("Invalid index for nnz_circle_Asc"); -} - -int SmootherGive::getCircleAscIndex(const int i_r, const int i_theta) const -{ - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; - const int size_stencil_interior = 3; - - if (i_r > 0) { - return size_stencil_interior * i_theta; - } else { - return size_stencil_inner_boundary * i_theta; + throw std::out_of_range("getNonZeroCountCircleAsc: Only i_r = 0 implemented."); } } -int SmootherGive::getNonZeroCountRadialAsc(const int i_theta) const -{ - assert(i_theta >= 0 && i_theta < grid_.ntheta()); - - const int size_stencil_next_circluar_smoothing = 2; - const int size_stencil_interior = 3; - const int size_stencil_next_outer_boundary = 2; - const int size_stencil_outer_boundary = 1; - - assert(grid_.lengthSmootherRadial() >= 3); - - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior + - size_stencil_next_outer_boundary + size_stencil_outer_boundary; -} - -int SmootherGive::getRadialAscIndex(const int i_r, const int i_theta) const +int SmootherGive::getCircleAscIndex(int i_r, int i_theta) const { - assert(i_theta >= 0 && i_theta < grid_.ntheta()); - - const int size_stencil_next_circluar_smoothing = 2; - const int size_stencil_interior = 3; - const int size_stencil_next_outer_boundary = 2; - const int size_stencil_outer_boundary = 1; - - assert(grid_.lengthSmootherRadial() >= 3); - assert(grid_.numberSmootherCircles() >= 2); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - if (i_r > numberSmootherCircles && i_r < grid_.nr() - 2) { - return size_stencil_next_circluar_smoothing + (i_r - numberSmootherCircles - 1) * size_stencil_interior; - } - else if (i_r == numberSmootherCircles) { - return 0; - } - else if (i_r == grid_.nr() - 2) { - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior; + if (i_r == 0) { + const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; + return size_stencil_inner_boundary * i_theta; } - else if (i_r == grid_.nr() - 1) { - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior + - size_stencil_next_outer_boundary; + else { + throw std::out_of_range("getCircleAscIndex: Only i_r = 0 implemented."); } - throw std::out_of_range("Invalid index for stencil"); } diff --git a/src/Smoother/SmootherGive/smootherGive.cpp b/src/Smoother/SmootherGive/smootherGive.cpp index 9a99c5c9..db458ad0 100644 --- a/src/Smoother/SmootherGive/smootherGive.cpp +++ b/src/Smoother/SmootherGive/smootherGive.cpp @@ -4,6 +4,8 @@ SmootherGive::SmootherGive(const PolarGrid& grid, const LevelCache& level_cache, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : Smoother(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) + , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS @@ -20,7 +22,138 @@ SmootherGive::~SmootherGive() #endif } +// The smoothing solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. void SmootherGive::smoothing(Vector x, ConstVector rhs, Vector temp) { - smoothingForLoop(x, rhs, temp); + assert(x.size() == rhs.size()); + assert(temp.size() == rhs.size()); + + Kokkos::deep_copy(temp, rhs); + + /* Multi-threaded execution */ + const int num_smoother_circles = grid_.numberSmootherCircles(); + const int num_radial_lines = grid_.ntheta(); + + /* ----------------------------------------------- */ + /* 1. Black-Circle update (u_bc): */ + /* A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 2) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 1)*/ +#pragma omp for + for (int circle_task = -1; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 2)*/ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + } + solveBlackCircleSection(x, temp); + + /* ----------------------------------------------- */ + /* 2. White-Circle update (u_wc): */ + /* A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside White Section */ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 2) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + /* Outside White Section (Part 1)*/ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + /* Outside White Section (Part 2)*/ +#pragma omp for + for (int circle_task = 2; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + } + solveWhiteCircleSection(x, temp); + + /* ----------------------------------------------- */ + /* 3. Black-Radial update (u_br): */ + /* A_br * u_br = f_br − A_br^ortho * u_br^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 3; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + } + solveBlackRadialSection(x, temp); + + /* ----------------------------------------------- */ + /* 4. White-Radial update (u_wr): */ + /* A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 2; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + } + solveWhiteRadialSection(x, temp); } diff --git a/src/Smoother/SmootherGive/solveAscSystem.cpp b/src/Smoother/SmootherGive/solveAscSystem.cpp new file mode 100644 index 00000000..b84b9e80 --- /dev/null +++ b/src/Smoother/SmootherGive/solveAscSystem.cpp @@ -0,0 +1,115 @@ +#include "../../../include/Smoother/SmootherGive/smootherGive.h" + +void SmootherGive::solveBlackCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + bool is_inner_circle_black = grid_.numberSmootherCircles() % 2 != 0; + + int batch_offset = is_inner_circle_black ? 2 : 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + + if (is_inner_circle_black) { +#ifdef GMGPOLAR_USE_MUMPS + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } +#else + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); +#endif + } + + // Move updated values to x + int start_black_circles = is_inner_circle_black ? 0 : 1; +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherGive::solveWhiteCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + bool is_inner_circle_white = grid_.numberSmootherCircles() % 2 == 0; + + int batch_offset = is_inner_circle_white ? 2 : 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + + if (is_inner_circle_white) { +#ifdef GMGPOLAR_USE_MUMPS + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } +#else + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); +#endif + } + + // Move updated values to x + int start_white_circles = is_inner_circle_white ? 0 : 1; +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherGive::solveBlackRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 0; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherGive::solveWhiteRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} \ No newline at end of file