SciCompMod · julianlitz · Feb 17, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h
@@ -23,6 +23,20 @@ class BatchedTridiagonalSolver
         assign(sub_diagonal_, T(0));
     }
 
+    /* ------------------- */
+    /* Accessors for sizes */
+    /* ------------------- */
+
+    int matrixDimension() const
+    {
+        return matrix_dimension_;
+    }
+
+    int batchCount() const
+    {
+        return batch_count_;
+    }
+
     /* ---------------------------- */
     /* Accessors for matrix entries */
     /* ---------------------------- */

diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h
@@ -2,99 +2,180 @@
 
 #include "../smoother.h"
 
+// The smoother implements the coupled circle-radial smoothing procedure.
+// It performs iterative updates on different parts of the grid based
+// on the circle/radial section of the grid and black/white line coloring.
+//
+// The smoother solves linear systems of the form:
+//   A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho
+// where:
+//   - s in {Circle, Radial} denotes the smoother section type,
+//   - c in {Black, White} denotes the coloring (even/odd line sub-system).
+//
+// The update sequence is as follows:
+//   1. Black-Circle update (u_bc):
+//      A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho
+//   2. White-Circle update (u_wc):
+//      A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho
+//   3. Black-Radial update (u_br):
+//      A_br * u_br = f_br − A_br^ortho * u_br^ortho
+//   4. White-Radial update (u_wr):
+//      A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho
+//
+// Algorithm details:
+//   - 'rhs' corresponds to the f vector, 'x' stores the final solution,
+//     and 'temp' is used for temporary storage during updates.
+//   - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho.
+//   - The system is then solved in-place in temp, and the results
+//     are copied back to x.
+//   - Using 'temp' isn't strictly necessary as all updates could be performed in place in 'x'.
+//   - The stencil is applied using the A-Take method.
+//
+// Solver and matrix structure:
+//   - The matrix A_sc is block tridiagonal due to the smoother-based
+//     grid indexing, which allows efficient line-wise factorization.
+//   - The inner boundary requires special handling because it
+//     contains an additional across-origin coupling, making it
+//     non-tridiagonal; therefore, a more general solver is used there.
+//     When using the MUMPS solver, the matrix is assembled in COO format.
+//     When using the in-house solver, the matrix is stored in CSR format.
+//   - Circular line matrices are cyclic tridiagonal due to angular
+//     periodicity, whereas radial line matrices are strictly tridiagonal.
+//   - Dirichlet boundary contributions in radial matrices are shifted
+//     into the right-hand side to make A_sc symmetric.
+
 class SmootherTake : public Smoother
 {
 public:
+    // Constructs the coupled circle-radial smoother.
+    // Builds the A_sc smoother matrices and prepares the solvers.
     explicit SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, const DomainGeometry& domain_geometry,
                           const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior,
                           int num_omp_threads);
+
+    // If MUMPS is enabled, this cleans up the inner boundary solver.
     ~SmootherTake() override;
 
+    // Performs one full coupled smoothing sweep:
+    //   BC -> WC -> BR -> WR
+    // using temp as RHS workspace.
     void smoothing(Vector<double> x, ConstVector<double> rhs, Vector<double> temp) override;
 
 private:
-    // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix
-    // 'inner_boundary_circle_matrix_' due to the across-origin treatment.
-    // It isn't tridiagonal and thus it requires a more advanced solver.
-    // Note that circle_tridiagonal_solver_[0] is thus unused!
-    // Additionally 'circle_tridiagonal_solver_[index]' will refer to the circular line i_r = index and
-    // 'radial_tridiagonal_solver_[index] will refer to the radial line i_theta = index.
+    /* ------------------- */
+    /* Tridiagonal solvers */
+    /* ------------------- */
+
+    // Batched solver for cyclic-tridiagonal circle line A_sc matrices.
+    BatchedTridiagonalSolver<double> circle_tridiagonal_solver_;
+
+    // Batched solver for tridiagonal radial circle line A_sc matrices.
+    BatchedTridiagonalSolver<double> radial_tridiagonal_solver_;
+
+    // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because
+    // it potentially includes across-origin coupling. Therefore, it is assembled
+    // into a sparse matrix and solved using a general-purpose sparse solver.
+    // When using the MUMPS solver, the matrix is assembled in COO format.
+    // When using the in-house solver, the matrix is stored in CSR format.
 #ifdef GMGPOLAR_USE_MUMPS
     using MatrixType = SparseMatrixCOO<double>;
     DMUMPS_STRUC_C inner_boundary_mumps_solver_;
 #else
     using MatrixType = SparseMatrixCSR<double>;
     SparseLUSolver<double> inner_boundary_lu_solver_;
 #endif
+    // Sparse matrix for the non-tridiagonal inner boundary circle block.
     MatrixType inner_boundary_circle_matrix_;
 
-    std::vector<SymmetricTridiagonalSolver<double>> circle_tridiagonal_solver_;
-    std::vector<SymmetricTridiagonalSolver<double>> radial_tridiagonal_solver_;
+    // Note:
+    //   - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead.
+    //   - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r.
+    //   - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta.
+
+    /* ------------------- */
+    /* Stencil definitions */
+    /* ------------------- */
+
+    // Stencils encode neighborhood connectivity for A_sc matrix assembly.
+    // It is only used in the construction of COO/CSR matrices.
+    // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices.
+    // The Stencil class stores the offset for each position.
+    // - Non-zero matrix indicesare obtained via `ptr + offset`
+    // - A offset value of `-1` means the position is not included in the stencil pattern.
+    // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices.
 
     // clang-format off
     const Stencil stencil_DB_ = {
         -1, -1, -1,
         -1,  0, -1,
         -1, -1, -1
     };
-    /* Circle Stencils */
-    const Stencil circle_stencil_interior_ = {
-        -1,  2, -1,
-        -1,  0, -1,
-        -1,  1, -1
-    };
     const Stencil circle_stencil_across_origin_ = {
         -1,  3, -1,
-        1,  0, -1,
+         1,  0, -1,
         -1,  2, -1
     };
-    /* Radial Stencils */
-    const Stencil radial_stencil_interior_ = {
-        -1, -1, -1,
-        1,  0,  2,
-        -1, -1, -1
-    };
-    const Stencil radial_stencil_next_outer_DB_ = {
-        -1, -1, -1,
-        1,  0, -1,
-        -1, -1, -1
-    };
-    const Stencil radial_stencil_next_circular_smoothing_ = {
-        -1, -1, -1,
-        -1,  0,  1,
-        -1, -1, -1
-    };
     // clang-format on
 
-    const Stencil& getStencil(int i_r) const;
-    int getNonZeroCountCircleAsc(const int i_r) const;
-    int getNonZeroCountRadialAsc(const int i_theta) const;
+    // Select correct stencil depending on the grid position.
+    const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */
+    // Number of nonzero A_sc entries.
+    int getNonZeroCountCircleAsc(int i_r) const; /* Only i_r = 0 implemented */
+    // Obtain a ptr to index into COO matrices.
+    // It accumulates all stencil sizes within a line up to, but excluding the current node.
+    int getCircleAscIndex(int i_r, int i_theta) const; /* Only i_r = 0 implemented */
 
-    int getCircleAscIndex(const int i_r, const int i_theta) const;
-    int getRadialAscIndex(const int i_r, const int i_theta) const;
+    /* --------------- */
+    /* Matrix assembly */
+    /* --------------- */
 
+    // Build all A_sc matrices for circle and radial smoothers.
     void buildAscMatrices();
-    void buildAscCircleSection(const int i_r);
-    void buildAscRadialSection(const int i_theta);
+    // Build A_sc matrix block for a single circular line.
+    void buildAscCircleSection(int i_r);
+    // Build A_sc matrix block for a single radial line.
+    void buildAscRadialSection(int i_theta);
+    // Build A_sc for a specific node (i_r, i_theta)
+    void nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                          MatrixType& inner_boundary_circle_matrix,
+                          BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
+                          BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, ConstVector<double>& arr,
+                          ConstVector<double>& att, ConstVector<double>& art, ConstVector<double>& detDF,
+                          ConstVector<double>& coeff_beta);
+
+    /* ---------------------- */
+    /* Orthogonal application */
+    /* ---------------------- */
 
-    void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector<double> x,
-                                    ConstVector<double> rhs, Vector<double> temp);
-    void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector<double> x,
-                                    ConstVector<double> rhs, Vector<double> temp);
+    // Compute temp = f_sc − A_sc^ortho * u_sc^ortho   (precomputed right-hand side)
+    // where x = u_sc and rhs = f_sc
+    void applyAscOrthoCircleSection(int i_r, ConstVector<double> x, ConstVector<double> rhs, Vector<double> temp);
+    void applyAscOrthoRadialSection(int i_theta, ConstVector<double> x, ConstVector<double> rhs, Vector<double> temp);
 
-    void solveCircleSection(const int i_r, Vector<double> x, Vector<double> temp, Vector<double> solver_storage_1,
-                            Vector<double> solver_storage_2);
-    void solveRadialSection(const int i_theta, Vector<double> x, Vector<double> temp, Vector<double> solver_storage);
+    /* ----------------- */
+    /* Line-wise solvers */
+    /* ----------------- */
 
+    // Solve the linear system:
+    //     A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho
+    // Parameter mapping:
+    //   x    = u_sc   (solution vector for section s and color c)
+    //   temp = f_sc − A_sc^ortho * u_sc^ortho   (precomputed right-hand side)
+    // where:
+    //   s in {Circle, Radial}  denotes the smoother section type,
+    //   c in {Black, White}    denotes the line coloring.
+    void solveBlackCircleSection(Vector<double> x, Vector<double> temp);
+    void solveWhiteCircleSection(Vector<double> x, Vector<double> temp);
+    void solveBlackRadialSection(Vector<double> x, Vector<double> temp);
+    void solveWhiteRadialSection(Vector<double> x, Vector<double> temp);
+
+    /* ----------------------------------- */
+    /* Initialize and destroy MUMPS solver */
+    /* ----------------------------------- */
 #ifdef GMGPOLAR_USE_MUMPS
+    // Initialize sparse MUMPS solver with assembled COO matrix.
     void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO<double>& solver_matrix);
+    // Release MUMPS internal memory and MPI structures.
     void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver);
 #endif
-
-    void nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
-                               MatrixType& inner_boundary_circle_matrix,
-                               std::vector<SymmetricTridiagonalSolver<double>>& circle_tridiagonal_solver,
-                               std::vector<SymmetricTridiagonalSolver<double>>& radial_tridiagonal_solver,
-                               ConstVector<double>& arr, ConstVector<double>& att, ConstVector<double>& art,
-                               ConstVector<double>& detDF, ConstVector<double>& coeff_beta);
 };
diff --git a/scripts/tutorial/run.sh b/scripts/tutorial/run.sh
@@ -115,6 +115,8 @@ else
     exit 1
 fi
 
+export OMP_NUM_THREADS=$maxOpenMPThreads
+
 "$GMGPOLAR_EXEC" \
     --verbose $verbose \
     --paraview $paraview \

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -124,11 +124,12 @@ set(SMOOTHER_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherSolver.cpp
 
     # SmootherTake
+    ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/applyAscOrtho.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildMatrix.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/initializeMumps.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/matrixStencil.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherSolver.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherTake.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/solveAscSystem.cpp
 )
 
 # file(GLOB_RECURSE EXTRAPOLATED_SMOOTHER_SOURCES