diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h index 2a5cf95a..9d664335 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h @@ -2,32 +2,91 @@ #include "../extrapolatedSmoother.h" +// The extrapolated smoother implements the coupled circle-radial smoothing with coarse node elimination. +// Coarse nodes are excluded from the smoothing iteration by: +// 1. Setting their diagonal entries in A_sc to 1.0 (identity mapping) +// 2. Setting their corresponding right-hand side values to the node's current value +// This effectively fixes coarse nodes in place while allowing fine nodes to be smoothed. +// Additionally, all fine-coarse coupling terms are moved from A_sc to A_sc^ortho. +// As a result, coarse node rows and columns become purely diagonal (losing their tridiagonal structure). +// +// It performs iterative updates on different parts of the grid based +// on the circle/radial section of the grid and black/white line coloring. +// +// The smoother solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. +// - Using 'temp' isn't strictly necessary as all updates could be performed in place in 'x'. +// - The stencil is applied using the A-Give method. +// +// Solver and matrix structure: +// - The matrix A_sc is block tridiagonal due to the smoother-based +// grid indexing, which allows efficient line-wise factorization. +// - The inner boundary requires special handling because it +// contains an additional across-origin coupling, making it +// non-tridiagonal; therefore, a more general solver is used there. +// When using the MUMPS solver, the matrix is assembled in COO format. +// When using the in-house solver, the matrix is stored in CSR format. +// - Circular line matrices are cyclic tridiagonal due to angular +// periodicity, whereas radial line matrices are strictly tridiagonal. +// - Dirichlet boundary contributions in radial matrices are shifted +// into the right-hand side to make A_sc symmetric. + class ExtrapolatedSmootherGive : public ExtrapolatedSmoother { public: + // Constructs the coupled circle-radial extrapolated smoother. + // Builds the A_sc smoother matrices and prepares the solvers. explicit ExtrapolatedSmootherGive(const PolarGrid& grid, const LevelCache& level_cache, const DomainGeometry& domain_geometry, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); + // If MUMPS is enabled, this cleans up the inner boundary solver. ~ExtrapolatedSmootherGive() override; + // Performs one full coupled extrapolated smoothing sweep: + // BC -> WC -> BR -> WR + // Parallel implementation using OpenMP: + // Scedule every 2nd/4th line in parallel to avoid race conditions arising from the A-Give distribution. + // Sceduling every 3rd line in parallel would also be possible, but is less natural for the 2 coloring. void extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) override; private: - void extrapolatedSmoothingSequential(Vector x, ConstVector rhs, Vector temp); - void extrapolatedSmoothingForLoop(Vector x, ConstVector rhs, Vector temp); - - // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix - // 'inner_boundary_circle_matrix_' due to the across-origin treatment. - // It isn't tridiagonal and thus it requires a more advanced solver. - // Note that circle_tridiagonal_solver_[0] is thus unused! - - // Lines containing coarse nodes are purely diagonal and thus are not stored in tridiagonal format. - // - 'circle_tridiagonal_solver_[index] refers to the circular line i_r = 2*index, - // - 'circle_diagonal_solver_[index] refers to the circular line i_r = 2*index + 1, - // - 'radial_tridiagonal_solver_[index] refers to the radial line i_theta = 2*index, - // - 'radial_diagonal_solver_[index] refers to the radial line i_theta = 2*index + 1. + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial circle line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; DMUMPS_STRUC_C inner_boundary_mumps_solver_; @@ -35,12 +94,25 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; #endif + // Sparse matrix for the non-tridiagonal inner boundary circle block. MatrixType inner_boundary_circle_matrix_; - std::vector> circle_diagonal_solver_; - std::vector> radial_diagonal_solver_; - std::vector> circle_tridiagonal_solver_; - std::vector> radial_tridiagonal_solver_; + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------- */ + /* Stencil definitions */ + /* ------------------- */ + + // Stencils encode neighborhood connectivity for A_sc matrix assembly. + // It is only used in the construction of COO/CSR matrices. + // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. + // The Stencil class stores the offset for each position. + // - Non-zero matrix indicesare obtained via `ptr + offset` + // - A offset value of `-1` means the position is not included in the stencil pattern. + // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices. // clang-format off Stencil stencil_center_ = { @@ -55,36 +127,66 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother }; // clang-format on - const Stencil& getStencil(int i_r, int i_theta) const; - int getNonZeroCountCircleAsc(const int i_r) const; - int getNonZeroCountRadialAsc(const int i_theta) const; + // Select correct stencil depending on the grid position. + const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ + // Number of nonzero A_sc entries. + int getNonZeroCountCircleAsc(int i_r) const; /* Only i_r = 0 implemented */ + // Obtain a ptr to index into COO matrices. + // It accumulates all stencil sizes within a line up to, but excluding the current node. + int getCircleAscIndex(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ - int getCircleAscIndex(const int i_r, const int i_theta) const; - int getRadialAscIndex(const int i_r, const int i_theta) const; + /* --------------- */ + /* Matrix assembly */ + /* --------------- */ + // Build all A_sc matrices for circle and radial smoothers. void buildAscMatrices(); - void buildAscCircleSection(const int i_r); - void buildAscRadialSection(const int i_theta); - - void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, + // Build A_sc matrix block for a single circular line. + void buildAscCircleSection(int i_r); + // Build A_sc matrix block for a single radial line. + void buildAscRadialSection(int i_theta); + // Build A_sc for a specific node (i_r, i_theta) + void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& inner_boundary_circle_matrix, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, + double art, double detDF, double coeff_beta); + + /* ---------------------- */ + /* Orthogonal application */ + /* ---------------------- */ + + // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) + // where x = u_sc and rhs = f_sc + void applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, + void applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void solveCircleSection(const int i_r, Vector x, Vector temp, Vector solver_storage_1, - Vector solver_storage_2); - void solveRadialSection(const int i_theta, Vector x, Vector temp, Vector solver_storage); - + /* ----------------- */ + /* Line-wise solvers */ + /* ----------------- */ + + // Solve the linear system: + // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho + // Parameter mapping: + // x = u_sc (solution vector for section s and color c) + // temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) + // where: + // s in {Circle, Radial} denotes the smoother section type, + // c in {Black, White} denotes the line coloring. + void solveBlackCircleSection(Vector x, Vector temp); + void solveWhiteCircleSection(Vector x, Vector temp); + void solveBlackRadialSection(Vector x, Vector temp); + void solveWhiteRadialSection(Vector x, Vector temp); + + /* ----------------------------------- */ + /* Initialize and destroy MUMPS solver */ + /* ----------------------------------- */ #ifdef GMGPOLAR_USE_MUMPS + // Initialize sparse MUMPS solver with assembled COO matrix. void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); + // Release MUMPS internal memory and MPI structures. void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); #endif - - void nodeBuildSmootherGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_diagonal_solver, - std::vector>& radial_diagonal_solver, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, double arr, - double att, double art, double detDF, double coeff_beta); }; diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 3ea507c2..3025dfbe 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -23,6 +23,20 @@ class BatchedTridiagonalSolver assign(sub_diagonal_, T(0)); } + /* ------------------- */ + /* Accessors for sizes */ + /* ------------------- */ + + int matrixDimension() const + { + return matrix_dimension_; + } + + int batchCount() const + { + return batch_count_; + } + /* ---------------------------- */ /* Accessors for matrix entries */ /* ---------------------------- */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 60e5fbd3..f22c29a4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -140,11 +140,12 @@ set(EXTRAPOLATED_SMOOTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/extrapolatedSmoother.cpp # ExtrapolatedSmootherGive + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp # ExtrapolatedSmootherTake ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp similarity index 74% rename from src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp rename to src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp index 2ef11a21..eab23224 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp @@ -788,8 +788,7 @@ static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const Polar /* but is shifted over to the rhs to make the radial Asc smoother matrices symmetric. */ /* Note that the circle Asc smoother matrices are symmetric by default. */ /* Note that rhs[right] contains the correct boundary value of u_D. */ - result[center] -= /* Right: Symmetry shift! */ - -coeff2 * arr * rhs[right]; + result[center] -= -coeff2 * arr * rhs[right]; /* Right: Symmetry shift! */ } else { /* ---------------|| */ @@ -885,9 +884,8 @@ static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const Polar } } -void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, - ConstVector x, ConstVector rhs, - Vector temp) +void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector x, + ConstVector rhs, Vector temp) { assert(i_r >= 0 && i_r < grid_.numberSmootherCircles() + 1); @@ -906,7 +904,7 @@ void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(const int i_r, const S } } -void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, +void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp) { @@ -924,295 +922,4 @@ void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(const int i_theta, con nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, detDF, coeff_beta); } -} - -void ExtrapolatedSmootherGive::solveCircleSection(const int i_r, Vector x, Vector temp, - Vector solver_storage_1, Vector solver_storage_2) -{ - const int start = grid_.index(i_r, 0); - const int end = start + grid_.ntheta(); - if (i_r == 0) { -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = temp.data() + start; - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } -#else - inner_boundary_lu_solver_.solveInPlace(temp.data()); -#endif - } - else { - if (i_r & 1) { - circle_tridiagonal_solver_[i_r / 2].solveInPlace(temp.data() + start, solver_storage_1.data(), - solver_storage_2.data()); - } - else { - circle_diagonal_solver_[i_r / 2].solveInPlace(temp.data() + start); - } - } - // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); -} - -void ExtrapolatedSmootherGive::solveRadialSection(const int i_theta, Vector x, Vector temp, - Vector solver_storage) -{ - const int start = grid_.index(grid_.numberSmootherCircles(), i_theta); - const int end = start + grid_.lengthSmootherRadial(); - if (i_theta & 1) { - radial_tridiagonal_solver_[i_theta / 2].solveInPlace(temp.data() + start, solver_storage.data()); - } - else { - radial_diagonal_solver_[i_theta / 2].solveInPlace(temp.data() + start); - } - // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); -} - -// Quick overview: -// Step 1: temp = rhs - Asc_ortho(x) -// Step 2: Solve in place -// Step 3: Update x - -/* ------------------ */ -/* Sequential Version */ -/* ------------------ */ - -void ExtrapolatedSmootherGive::extrapolatedSmoothingSequential(Vector x, ConstVector rhs, - Vector temp) -{ - assert(x.size() == rhs.size()); - assert(temp.size() == rhs.size()); - - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - const int index = grid_.index(i_r, i_theta); - temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; - } - } - - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - const int index = grid_.index(i_r, i_theta); - temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; - } - } - - /* Single-threaded execution */ - Vector circle_solver_storage_1("circle_solver_storage1", grid_.ntheta()); - Vector circle_solver_storage_2("circle_solver_storage1", grid_.ntheta()); - Vector radial_solver_storage("radial_solver_storage", grid_.lengthSmootherRadial()); - - /* ---------------------------- */ - /* ------ CIRCLE SECTION ------ */ - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - for (int i_r = 0; i_r < grid_.numberSmootherCircles() + 1; i_r++) { - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - /* ---------------------------- */ - /* ------ RADIAL SECTION ------ */ - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } -} - -/* ------------------------------------ */ -/* Parallelization Version 1: For Loops */ -/* ------------------------------------ */ - -// clang-format off -void ExtrapolatedSmootherGive::extrapolatedSmoothingForLoop(Vector x, ConstVector rhs, - Vector temp) -{ - assert(x.size() == rhs.size()); - assert(temp.size() == rhs.size()); - - if (num_omp_threads_ == 1) { - extrapolatedSmoothingSequential(x, rhs, temp); - } - else { - #pragma omp parallel num_threads(num_omp_threads_) - { - #pragma omp for nowait - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - const int index = grid_.index(i_r, i_theta); - temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; - } - } - #pragma omp for - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - const int index = grid_.index(i_r, i_theta); - temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; - } - } - } - - /* Multi-threaded execution */ - const int num_circle_tasks = grid_.numberSmootherCircles(); - const int num_radial_tasks = grid_.ntheta(); - - #pragma omp parallel - { - Vector circle_solver_storage_1("circle_solver_storage_1",grid_.ntheta()); - Vector circle_solver_storage_2("circle_solver_storage_2",grid_.ntheta()); - Vector radial_solver_storage("radial_solver_storage",grid_.lengthSmootherRadial()); - - /* ---------------------------- */ - /* ------ CIRCLE SECTION ------ */ - /* ---------------------------- */ - - /* ---------------------------- */ - /* Asc ortho Black Circle Tasks */ - - /* Inside Black Section */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 1)*/ - #pragma omp for - for (int circle_task = -1; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 2)*/ - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - - /* Black Circle Smoother */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Inside White Section */ - #pragma omp for nowait - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Inside Black Section */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Outside White Section (Part 1)*/ - #pragma omp for nowait - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Outside Black Section (Part 1) */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - /* Outside White Section (Part 2)*/ - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 4) { - int i_r = num_circle_tasks - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* ---------------------------- */ - /* Asc ortho Black Radial Tasks */ - /* Outside Black Section (Part 2) */ - #pragma omp for - for (int radial_task = 3; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - - /* White Circle Smoother */ - #pragma omp for nowait - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 2) { - int i_r = num_circle_tasks - circle_task - 1; - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - /* Black Radial Smoother */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - - /* ---------------------------- */ - /* Asc ortho White Circle Tasks */ - - /* Inside White Section */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 1) */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 2) */ - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 4) { - int i_theta = radial_task; - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - - /* White Radial Smoother */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 2) { - int i_theta = radial_task; - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } - } - } -} +} \ No newline at end of file diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp index 7b35feb7..ccd0cf46 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp @@ -3,21 +3,15 @@ #include "../../../include/Definitions/geometry_helper.h" /* Tridiagonal matrices */ -static inline void updateTridiagonalElement(SymmetricTridiagonalSolver& matrix, int row, int column, - double value) +static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, + double value) { if (row == column) - matrix.main_diagonal(row) += value; + solver.main_diagonal(batch, row) += value; else if (row == column - 1) - matrix.sub_diagonal(row) += value; - else if (row == 0 && column == matrix.columns() - 1) - matrix.cyclic_corner_element() += value; -} - -/* Diagonal matrices */ -static inline void updateDiagonalElement(DiagonalSolver& matrix, int row, int column, double value) -{ - matrix.diagonal(row) += value; + solver.sub_diagonal(batch, row) += value; + else if (row == 0 && column == solver.matrixDimension() - 1) + solver.cyclic_corner(batch) += value; } /* Inner Boundary COO/CSR matrix */ @@ -38,13 +32,11 @@ static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, in } #endif -void ExtrapolatedSmootherGive::nodeBuildSmootherGive( - int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_diagonal_solver, - std::vector>& radial_diagonal_solver, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, double arr, double att, double art, - double detDF, double coeff_beta) +void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& inner_boundary_circle_matrix, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, + double att, double art, double detDF, double coeff_beta) { assert(i_r >= 0 && i_r < grid.nr()); assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -80,6 +72,14 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int right_index = i_theta; int bottom_index = i_theta_M1; int top_index = i_theta_P1; + + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& left_solver = circle_tridiagonal_solver; + int left_batch = i_r - 1; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; + /* -------------------------- */ /* Cyclic Tridiagonal Section */ /* i_r % 2 == 1 */ @@ -98,52 +98,48 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | | */ /* | o | o | o | */ - auto& center_matrix = circle_tridiagonal_solver[i_r / 2]; - auto& left_matrix = circle_diagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = circle_diagonal_solver[(i_r + 1) / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = bottom_index; value = -coeff3 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = top_index; value = -coeff4 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = center_index; value = -coeff3 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = center_index; value = -coeff4 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); if (i_theta & 1) { /* i_theta % 2 == 1 */ @@ -172,14 +168,14 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateDiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); } /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateDiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } } /* ---------------- */ @@ -200,52 +196,48 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | | */ /* | o | o | o | */ - auto& center_matrix = circle_diagonal_solver[i_r / 2]; - auto& left_matrix = circle_tridiagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = circle_tridiagonal_solver[(i_r + 1) / 2]; - if (i_theta & 1) { /* i_theta % 2 == 1 */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } else { /* i_theta % 2 == 0 */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 1.0; - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } } /* ------------------------------------------ */ @@ -270,6 +262,14 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int right_index = i_r - numberSmootherCircles + 1; int bottom_index = i_r - numberSmootherCircles; int top_index = i_r - numberSmootherCircles; + + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; + /* ------------------- */ /* Tridiagonal Section */ /* i_theta % 2 == 1 */ @@ -292,52 +292,48 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* o x o */ /* ---------- */ - auto& center_matrix = radial_tridiagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_diagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_diagonal_solver[i_theta_P1 / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = left_index; value = -coeff1 * arr; /* Left */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = right_index; value = -coeff2 * arr; /* Right */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = center_index; value = -coeff1 * arr; /* Right */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = center_index; value = -coeff2 * arr; /* Left */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); if (i_r & 1) { /* i_r % 2 == 1 */ /* ---------- */ @@ -351,13 +347,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateDiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateDiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } } /* ---------------- */ @@ -382,57 +378,57 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* o o o */ /* ---------- */ - auto& center_matrix = radial_diagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1 / 2]; if (i_r & 1) { /* i_r % 2 == 1 */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } else { /* i_r % 2 == 0 */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 1.0; - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } } /* ------------------------------------------ */ /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; + /* ------------------------------------------------ */ /* Case 1: Dirichlet boundary on the inner boundary */ /* ------------------------------------------------ */ @@ -447,9 +443,6 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - auto& center_matrix = inner_boundary_circle_matrix; - auto& right_matrix = circle_tridiagonal_solver[(i_r + 1) / 2]; - int center_index = i_theta; int right_index = i_theta; int bottom_index = i_theta_M1; @@ -464,13 +457,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = CenterStencil[StencilPosition::Center]; col = center_index; val = 1.0; - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } else { /* ------------------------------------------------------------- */ @@ -515,9 +508,6 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* -| | | | */ /* -| x | o | x | */ - auto& center_matrix = inner_boundary_circle_matrix; - auto& right_matrix = circle_tridiagonal_solver[(i_r + 1) / 2]; - auto& left_matrix = inner_boundary_circle_matrix; /* Fill matrix row of (i,j) */ row = center_index; ptr = center_nz_index; @@ -525,17 +515,17 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = CenterStencil[StencilPosition::Center]; col = center_index; val = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); offset = CenterStencil[StencilPosition::Left]; col = left_index; val = -coeff1 * arr; /* Left */ - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); offset = CenterStencil[StencilPosition::Center]; col = center_index; val = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i-1,j) */ /* From view the view of the across origin node, */ @@ -548,18 +538,18 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = LeftStencil[StencilPosition::Left]; col = center_index; val = -coeff1 * arr; /* Right -> Left*/ - updateCOOCSRMatrixElement(left_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); offset = LeftStencil[StencilPosition::Center]; col = left_index; val = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ - updateCOOCSRMatrixElement(left_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } else { /* i_theta % 2 == 0 */ @@ -569,9 +559,6 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* -| | | | */ /* -| o | o | o | */ - auto& center_matrix = inner_boundary_circle_matrix; - auto& right_matrix = circle_tridiagonal_solver[(i_r + 1) / 2]; - auto& left_matrix = inner_boundary_circle_matrix; /* Fill matrix row of (i,j) */ row = center_index; ptr = center_nz_index; @@ -579,7 +566,7 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = CenterStencil[StencilPosition::Center]; col = center_index; val = 1.0; - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i,j-1) */ row = bottom_index; @@ -590,7 +577,7 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = BottomStencil[StencilPosition::Center]; col = bottom_index; val = +coeff3 * att; /* Center: (Top) */ - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i,j+1) */ row = top_index; @@ -601,13 +588,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( offset = TopStencil[StencilPosition::Center]; col = top_index; val = +coeff4 * att; /* Center: (Bottom) */ - updateCOOCSRMatrixElement(center_matrix, ptr, offset, row, col, val); + updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } } } @@ -636,6 +623,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int bottom_index = i_theta_M1; int top_index = i_theta_P1; + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& left_solver = circle_tridiagonal_solver; + int left_batch = i_r - 1; + auto& right_solver = radial_tridiagonal_solver; + int right_batch = i_theta; + if (i_r & 1) { if (i_theta & 1) { /* i_r % 2 == 1 and i_theta % 2 == 1 */ @@ -645,64 +639,60 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | x | o || x o x o */ - auto& center_matrix = circle_tridiagonal_solver[i_r / 2]; - auto& left_matrix = circle_diagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = radial_tridiagonal_solver[i_theta / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = bottom_index; value = -coeff3 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = top_index; value = -coeff4 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = center_index; value = -coeff3 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = center_index; value = -coeff4 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateDiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } else { /* i_r % 2 == 1 and i_theta % 2 == 0 */ @@ -712,52 +702,48 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | o | o || o o o o */ - auto& center_matrix = circle_tridiagonal_solver[i_r / 2]; - auto& left_matrix = circle_diagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = radial_diagonal_solver[i_theta / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = bottom_index; value = -coeff3 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = top_index; value = -coeff4 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = center_index; value = -coeff3 * att; /* Top */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = center_index; value = -coeff4 * att; /* Bottom */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } } else { @@ -769,32 +755,28 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | x | o | x || o x o x */ - auto& center_matrix = circle_diagonal_solver[i_r / 2]; - auto& left_matrix = circle_tridiagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = radial_tridiagonal_solver[i_theta / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } else { /* i_r % 2 == 0 and i_theta % 2 == 0 */ @@ -804,39 +786,35 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | o | o || o o o o */ - auto& center_matrix = circle_diagonal_solver[i_r / 2]; - auto& left_matrix = circle_tridiagonal_solver[(i_r - 1) / 2]; - auto& right_matrix = radial_diagonal_solver[i_theta / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 1.0; - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateDiagonalElement(right_matrix, row, column, value); + updateMatrixElement(right_solver, right_batch, row, column, value); } } } @@ -863,6 +841,15 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( const int bottom_index = i_r - numberSmootherCircles; const int top_index = i_r - numberSmootherCircles; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; + auto& left_solver = circle_tridiagonal_solver; + int left_batch = i_r - 1; + if (i_theta & 1) { if (i_r & 1) { /* i_theta % 2 == 1 and i_r % 2 == 1 */ @@ -872,55 +859,50 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | x | o | x || o x o x */ - auto& center_matrix = radial_tridiagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_diagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_diagonal_solver[i_theta_P1 / 2]; - auto& left_matrix = circle_diagonal_solver[(i_r - 1) / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = right_index; value = -coeff2 * arr; /* Right */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateDiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = center_index; value = -coeff2 * arr; /* Left */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateDiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateDiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } else { /* i_theta % 2 == 1 and i_r % 2 == 0 */ @@ -930,43 +912,38 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | x | o || x o x o */ - auto& center_matrix = radial_tridiagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_diagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_diagonal_solver[i_theta_P1 / 2]; - auto& left_matrix = circle_tridiagonal_solver[(i_r - 1) / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = right_index; value = -coeff2 * arr; /* Right */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = center_index; value = -coeff2 * arr; /* Left */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } } else { @@ -978,33 +955,28 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | o | o || o o o o */ - auto& center_matrix = radial_diagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1 / 2]; - auto& left_matrix = circle_diagonal_solver[(i_r - 1) / 2]; - /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } else { /* i_theta % 2 == 0 and i_r % 2 == 0 */ @@ -1014,41 +986,35 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* | | | || -------------- */ /* | o | o | o || o o o o */ - auto& center_matrix = radial_diagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1 / 2]; - auto& left_matrix = circle_tridiagonal_solver[(i_r - 1) / 2]; - /* Fill matrix row of (i,j) */ - row = center_index; column = center_index; value = 1.0; - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(left_matrix, row, column, value); + updateMatrixElement(left_solver, left_batch, row, column, value); /* Fill matrix row of (i+1,j) */ row = right_index; column = right_index; value = coeff2 * arr; /* Center: (Left) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } } } @@ -1077,6 +1043,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int bottom_index = i_r - numberSmootherCircles; int top_index = i_r - numberSmootherCircles; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + auto& bottom_solver = radial_tridiagonal_solver; + int bottom_batch = i_theta_M1; + auto& top_solver = radial_tridiagonal_solver; + int top_batch = i_theta_P1; + if (i_theta & 1) { /* i_theta % 2 == 1 */ /* ---------------|| */ @@ -1086,37 +1059,35 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* ---------------|| */ /* o x o x || */ /* ---------------|| */ - auto& center_matrix = radial_tridiagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_diagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_diagonal_solver[i_theta_P1 / 2]; + /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = left_index; value = -coeff1 * arr; /* Left */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Remark: Right is not included here due to the symmetry shift */ row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = center_index; value = -coeff1 * arr; /* Right */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i+1,j) */ /* Nothing to be done */ @@ -1125,13 +1096,13 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateDiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateDiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } else { /* i_theta % 2 == 0 */ @@ -1142,31 +1113,29 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* ---------------|| */ /* o o o o || */ /* ---------------|| */ - auto& center_matrix = radial_diagonal_solver[i_theta / 2]; - auto& bottom_matrix = radial_tridiagonal_solver[i_theta_M1 / 2]; - auto& top_matrix = radial_tridiagonal_solver[i_theta_P1 / 2]; + /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; column = center_index; value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i,j-1) */ row = bottom_index; column = bottom_index; value = coeff3 * att; /* Center: (Top) */ - updateTridiagonalElement(bottom_matrix, row, column, value); + updateMatrixElement(bottom_solver, bottom_batch, row, column, value); /* Fill matrix row of (i,j+1) */ row = top_index; column = top_index; value = coeff4 * att; /* Center: (Bottom) */ - updateTridiagonalElement(top_matrix, row, column, value); + updateMatrixElement(top_solver, top_batch, row, column, value); } } /* ------------------------------------------ */ @@ -1184,6 +1153,9 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( int center_index = i_r - numberSmootherCircles; int left_index = i_r - numberSmootherCircles - 1; + auto& center_solver = radial_tridiagonal_solver; + int center_batch = i_theta; + if (i_theta & 1) { /* i_theta % 2 == 1 */ /* -----------|| */ @@ -1193,19 +1165,18 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* -----------|| */ /* x o x || */ /* -----------|| */ - auto& center_matrix = radial_tridiagonal_solver[i_theta / 2]; /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 1.0; - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateTridiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } else { /* i_theta % 2 == 0 */ @@ -1216,19 +1187,18 @@ void ExtrapolatedSmootherGive::nodeBuildSmootherGive( /* -----------|| */ /* o o o || */ /* -----------|| */ - auto& center_matrix = radial_diagonal_solver[i_theta / 2]; /* Fill matrix row of (i,j) */ row = center_index; column = center_index; value = 1.0; - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ - updateDiagonalElement(center_matrix, row, column, value); + updateMatrixElement(center_solver, center_batch, row, column, value); } } } @@ -1244,9 +1214,8 @@ void ExtrapolatedSmootherGive::buildAscCircleSection(const int i_r) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildSmootherGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_diagonal_solver_, radial_diagonal_solver_, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, + circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } @@ -1261,205 +1230,112 @@ void ExtrapolatedSmootherGive::buildAscRadialSection(const int i_theta) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildSmootherGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_diagonal_solver_, radial_diagonal_solver_, circle_tridiagonal_solver_, - radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, + circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -// clang-format off void ExtrapolatedSmootherGive::buildAscMatrices() { /* -------------------------------------- */ /* Part 1: Allocate Asc Smoother matrices */ /* -------------------------------------- */ + // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. + // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - const int number_smoother_circles = grid_.numberSmootherCircles(); - const int length_smoother_radial = grid_.lengthSmootherRadial(); - - const int num_circle_nodes = grid_.ntheta(); - circle_tridiagonal_solver_.resize(number_smoother_circles / 2); - circle_diagonal_solver_.resize(number_smoother_circles - number_smoother_circles / 2); - - assert((grid_.ntheta() / 2) % 2 == 0); - const int num_radial_nodes = length_smoother_radial; - radial_tridiagonal_solver_.resize(grid_.ntheta() / 2); - radial_diagonal_solver_.resize(grid_.ntheta() / 2); - - // Remark: circle_diagonal_solver_[0] is undefnied. - // Use inner_boundary_circle_matrix_ instead. - #pragma omp parallel num_threads(num_omp_threads_) if (grid_.numberOfNodes() > 10'000) - { - // ---------------- // - // Circular Section // - #pragma omp for nowait - for (int circle_Asc_index = 0; circle_Asc_index < number_smoother_circles; circle_Asc_index++) { - - /* Inner boundary circle */ - if (circle_Asc_index == 0) { - #ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int nnz = getNonZeroCountCircleAsc(circle_Asc_index); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, nnz); - inner_boundary_circle_matrix_.is_symmetric(false); - for (int i = 0; i < nnz; i++) { - inner_boundary_circle_matrix_.value(i) = 0.0; - } - #else - std::function nnz_per_row = [&](int i_theta) { - if(DirBC_Interior_) return 1; - else return i_theta % 2 == 0 ? 1 : 2; - }; - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); - for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) { - inner_boundary_circle_matrix_.values_data()[i] = 0.0; - } - #endif - } - - /* Interior Circle Section */ - else { - if (circle_Asc_index & 1) { - const int circle_tridiagonal_solver_index = circle_Asc_index / 2; - auto& solver_matrix = circle_tridiagonal_solver_[circle_tridiagonal_solver_index]; - - solver_matrix = SymmetricTridiagonalSolver(num_circle_nodes); - solver_matrix.is_cyclic(true); - solver_matrix.cyclic_corner_element() = 0.0; - - for (int i = 0; i < num_circle_nodes; i++) { - solver_matrix.main_diagonal(i) = 0.0; - if (i < num_circle_nodes - 1) { - solver_matrix.sub_diagonal(i) = 0.0; - } - } - } - else { - const int circle_diagonal_solver_index = circle_Asc_index / 2; - auto& solver_matrix = circle_diagonal_solver_[circle_diagonal_solver_index]; - - solver_matrix = DiagonalSolver(num_circle_nodes); - - for (int i = 0; i < num_circle_nodes; i++) { - solver_matrix.diagonal(i) = 0.0; - } - } - } - } - - // -------------- // - // Radial Section // - #pragma omp for nowait - for (int radial_Asc_index = 0; radial_Asc_index < grid_.ntheta(); radial_Asc_index++) { - - if (radial_Asc_index & 1) { - const int radial_tridiagonal_solver_index = radial_Asc_index / 2; - auto& solver_matrix = radial_tridiagonal_solver_[radial_tridiagonal_solver_index]; - - solver_matrix = SymmetricTridiagonalSolver(num_radial_nodes); - solver_matrix.is_cyclic(false); - - for (int i = 0; i < num_radial_nodes; i++) { - solver_matrix.main_diagonal(i) = 0.0; - if (i < num_radial_nodes - 1) { - solver_matrix.sub_diagonal(i) = 0.0; - } - } - } - else { - const int radial_diagonal_solver_index = radial_Asc_index / 2; - auto& solver_matrix = radial_diagonal_solver_[radial_diagonal_solver_index]; - - solver_matrix = DiagonalSolver(num_radial_nodes); - - for (int i = 0; i < num_radial_nodes; i++) { - solver_matrix.diagonal(i) = 0.0; - } - } - } +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int inner_i_r = 0; + const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); + inner_boundary_circle_matrix_.is_symmetric(false); +#else + std::function nnz_per_row = [&](int i_theta) { + if (DirBC_Interior_) + return 1; + else + return i_theta % 2 == 0 ? 1 : 2; + }; + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); + + for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) { + inner_boundary_circle_matrix_.values_data()[i] = 0.0; } +#endif + /* ---------------------------------- */ /* Part 2: Fill Asc Smoother matrices */ /* ---------------------------------- */ - if (num_omp_threads_ == 1) { - /* Single-threaded execution */ - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + /* Multi-threaded execution: */ + const int num_smoother_circles = grid_.numberSmootherCircles(); + const int additional_radial_tasks = grid_.ntheta() % 3; + const int num_radial_tasks = grid_.ntheta() - additional_radial_tasks; + +#pragma omp parallel num_threads(num_omp_threads_) + { +#pragma omp for + for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { buildAscCircleSection(i_r); } - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); +#pragma omp for + for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { + buildAscCircleSection(i_r); + } +#pragma omp for + for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { + buildAscCircleSection(i_r); } - } - else { - /* Multi-threaded execution: For Loops */ - const int num_circle_tasks = grid_.numberSmootherCircles(); - const int additional_radial_tasks = grid_.ntheta() % 3; - const int num_radial_tasks = grid_.ntheta() - additional_radial_tasks; - - #pragma omp parallel num_threads(num_omp_threads_) - { - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid_.numberSmootherCircles() - circle_task - 1; - buildAscCircleSection(i_r); - } - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(0); - } - else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); - } - } +#pragma omp for + for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { + if (radial_task > 0) { + int i_theta = radial_task + additional_radial_tasks; + buildAscRadialSection(i_theta); } - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + else { + if (additional_radial_tasks == 0) { + buildAscRadialSection(0); } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(1); - } - else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); - } - else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); - } + else if (additional_radial_tasks >= 1) { + buildAscRadialSection(0); + buildAscRadialSection(1); } } - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + } +#pragma omp for + for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { + if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; buildAscRadialSection(i_theta); } + else { + if (additional_radial_tasks == 0) { + buildAscRadialSection(1); + } + else if (additional_radial_tasks == 1) { + buildAscRadialSection(2); + } + else if (additional_radial_tasks == 2) { + buildAscRadialSection(2); + buildAscRadialSection(3); + } + } + } +#pragma omp for + for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + int i_theta = radial_task + additional_radial_tasks; + buildAscRadialSection(i_theta); } } - #ifdef GMGPOLAR_USE_MUMPS + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + +#ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------- */ /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ /* ------------------------------------------------------------------- */ @@ -1485,6 +1361,6 @@ void ExtrapolatedSmootherGive::buildAscMatrices() current_nz++; } } - #endif +#endif } // clang-format on diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp index bfe0dc5b..c3c50019 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp @@ -6,6 +6,8 @@ ExtrapolatedSmootherGive::ExtrapolatedSmootherGive(const PolarGrid& grid, const const bool DirBC_Interior, const int num_omp_threads) : ExtrapolatedSmoother(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) + , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS @@ -22,7 +24,154 @@ ExtrapolatedSmootherGive::~ExtrapolatedSmootherGive() #endif } +// The smoothing solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. void ExtrapolatedSmootherGive::extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) { - extrapolatedSmoothingForLoop(x, rhs, temp); + assert(x.size() == rhs.size()); + assert(temp.size() == rhs.size()); + +#pragma omp parallel num_threads(num_omp_threads_) + { +#pragma omp for nowait + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const int index = grid_.index(i_r, i_theta); + temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; + } + } +#pragma omp for + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + const int index = grid_.index(i_r, i_theta); + temp[index] = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index]; + } + } + } + + /* Multi-threaded execution */ + const int num_smoother_circles = grid_.numberSmootherCircles(); + const int num_radial_lines = grid_.ntheta(); + + /* ----------------------------------------------- */ + /* 1. Black-Circle update (u_bc): */ + /* A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 2) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 1)*/ +#pragma omp for + for (int circle_task = -1; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 2)*/ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } + } + solveBlackCircleSection(x, temp); + + /* ----------------------------------------------- */ + /* 2. White-Circle update (u_wc): */ + /* A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside White Section */ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 2) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + /* Outside White Section (Part 1)*/ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + /* Outside White Section (Part 2)*/ +#pragma omp for + for (int circle_task = 2; circle_task < num_smoother_circles; circle_task += 4) { + int i_r = num_smoother_circles - circle_task - 1; + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } + } + solveWhiteCircleSection(x, temp); + + /* ----------------------------------------------- */ + /* 3. Black-Radial update (u_br): */ + /* A_br * u_br = f_br − A_br^ortho * u_br^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 3; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } + } + solveBlackRadialSection(x, temp); + + /* ----------------------------------------------- */ + /* 4. White-Radial update (u_wr): */ + /* A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho */ + /* ----------------------------------------------- */ +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 2; i_theta < num_radial_lines; i_theta += 4) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } + } + solveWhiteRadialSection(x, temp); } diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp index 78a316fd..79326c35 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp @@ -63,13 +63,3 @@ int ExtrapolatedSmootherGive::getCircleAscIndex(const int i_r, const int i_theta throw std::out_of_range("ptr_nz_index_circle_Asc: Only i_r = 0 implemented."); } - -int ExtrapolatedSmootherGive::getNonZeroCountRadialAsc(const int i_theta) const -{ - throw std::out_of_range("ExtrapolatedSmoother: nnz_radial_Asc not implemented."); -} - -int ExtrapolatedSmootherGive::getRadialAscIndex(const int i_r, const int i_theta) const -{ - throw std::out_of_range("ExtrapolatedSmoother: ptr_nz_index_radial_Asc not implemented."); -} diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp new file mode 100644 index 00000000..68df770e --- /dev/null +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp @@ -0,0 +1,125 @@ +#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" + +void ExtrapolatedSmootherGive::solveBlackCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + bool is_inner_circle_black = grid_.numberSmootherCircles() % 2 != 0; + + if (!is_inner_circle_black) { + int batch_offset = 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + } + else { + int batch_offset = 2; + int batch_stride = 2; + circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + +#ifdef GMGPOLAR_USE_MUMPS + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } +#else + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); +#endif + } + + // Move updated values to x + int start_black_circles = is_inner_circle_black ? 0 : 1; +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void ExtrapolatedSmootherGive::solveWhiteCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + bool is_inner_circle_white = grid_.numberSmootherCircles() % 2 == 0; + + if (!is_inner_circle_white) { + int batch_offset = 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + } + else { + int batch_offset = 2; + int batch_stride = 2; + circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + +#ifdef GMGPOLAR_USE_MUMPS + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } +#else + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); +#endif + } + + // Move updated values to x + int start_white_circles = is_inner_circle_white ? 0 : 1; +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void ExtrapolatedSmootherGive::solveBlackRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 0; + int batch_stride = 2; + radial_tridiagonal_solver_.solve_diagonal(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void ExtrapolatedSmootherGive::solveWhiteRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +}