Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row, int column,
double value)
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
4 changes: 2 additions & 2 deletions include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_take

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row, int column,
double value)
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ namespace extrapolated_smoother_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace extrapolated_smoother_take

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
40 changes: 35 additions & 5 deletions include/LinearAlgebra/Solvers/tridiagonal_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ class BatchedTridiagonalSolver
{
return main_diagonal_(batch_idx * matrix_dimension_ + index);
}
KOKKOS_INLINE_FUNCTION
void set_main_diagonal(const int batch_idx, const int index, const T value) const
{
main_diagonal_(batch_idx * matrix_dimension_ + index) = value;
}
KOKKOS_INLINE_FUNCTION
void add_main_diagonal(const int batch_idx, const int index, const T value) const
{
main_diagonal_(batch_idx * matrix_dimension_ + index) += value;
}

KOKKOS_INLINE_FUNCTION
const T& sub_diagonal(const int batch_idx, const int index) const
Expand All @@ -65,18 +75,37 @@ class BatchedTridiagonalSolver
{
return sub_diagonal_(batch_idx * matrix_dimension_ + index);
}
KOKKOS_INLINE_FUNCTION
void set_sub_diagonal(const int batch_idx, const int index, const T value) const
{
sub_diagonal_(batch_idx * matrix_dimension_ + index) = value;
}
KOKKOS_INLINE_FUNCTION
void add_sub_diagonal(const int batch_idx, const int index, const T value) const
{
sub_diagonal_(batch_idx * matrix_dimension_ + index) += value;
}

KOKKOS_INLINE_FUNCTION
const T& cyclic_corner(const int batch_idx) const
{
return sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1));
}

KOKKOS_INLINE_FUNCTION
T& cyclic_corner(const int batch_idx)
{
return sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1));
}
KOKKOS_INLINE_FUNCTION
void set_cyclic_corner(const int batch_idx, const T value) const
{
sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)) = value;
}
KOKKOS_INLINE_FUNCTION
void add_cyclic_corner(const int batch_idx, const T value) const
{
sub_diagonal_(batch_idx * matrix_dimension_ + (matrix_dimension_ - 1)) += value;
}

/* ---------------------------------------------- */
/* Setup: Cholesky Decomposition: A = L * D * L^T */
Expand Down Expand Up @@ -233,6 +262,7 @@ class BatchedTridiagonalSolver
rhs(offset + 0) + cyclic_corner_element / gamma(batch_idx) * rhs(offset + matrix_dimension - 1);
const T dot_product_u_v = buffer(offset + 0) + cyclic_corner_element / gamma(batch_idx) *
buffer(offset + matrix_dimension - 1);

const T factor = dot_product_x_v / (1.0 + dot_product_u_v);

for (int i = 0; i < matrix_dimension; i++) {
Expand Down Expand Up @@ -305,10 +335,10 @@ class BatchedTridiagonalSolver
int matrix_dimension_;
int batch_count_;

Vector<T> main_diagonal_;
Vector<T> sub_diagonal_;
Vector<T> buffer_;
Vector<T> gamma_;
AllocatableVector<T> main_diagonal_;
AllocatableVector<T> sub_diagonal_;
AllocatableVector<T> buffer_;
AllocatableVector<T> gamma_;

bool is_cyclic_;
bool is_factorized_;
Expand Down
4 changes: 2 additions & 2 deletions include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace smoother_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
56 changes: 35 additions & 21 deletions include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,35 @@

namespace smoother_take
{
#include "matrixStencil.inl"

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(const SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr,
int offset, int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
matrix.set_value(ptr + offset , value);
matrix.set_value(ptr + offset, value);
}
#else
// When using the in-house solver, the matrix is stored in CSR format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
static inline void update_CSR_COO_MatrixElement(const SparseMatrixCSR<double, Kokkos::HostSpace>& matrix, int ptr,
int offset, int row, int column, double value)
{
matrix.set_row_nz_index(row, offset, column);
matrix.set_row_nz_entry(row, offset, value);
}
#endif

} // namespace smoother_take

template <class LevelCacheType>
void SmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix(
int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, ConstVector<double>& arr,
ConstVector<double>& att, ConstVector<double>& art, ConstVector<double>& detDF, ConstVector<double>& coeff_beta)
template <class MatrixType>
static inline void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior,
const MatrixType& matrix, ConstVector<double>& arr,
ConstVector<double>& att, ConstVector<double>& art,
ConstVector<double>& detDF, ConstVector<double>& coeff_beta)
{
using smoother_take::getCircleAscIndex;
using smoother_take::getStencil;
using smoother_take::update_CSR_COO_MatrixElement;

assert(i_theta >= 0 && i_theta < grid.ntheta());
Expand All @@ -47,13 +49,13 @@ void SmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix(
/* ------------------------------------------------ */
if (DirBC_Interior) {
const int center_index = i_theta;
const int center_nz_index = getCircleAscIndex(i_r, i_theta);
const int center_nz_index = getCircleAscIndex(i_r, i_theta, DirBC_Interior);

/* Fill matrix row of (i,j) */
row = center_index;
ptr = center_nz_index;

const Stencil& CenterStencil = getStencil(i_r);
const Stencil& CenterStencil = getStencil(i_r, DirBC_Interior);

offset = CenterStencil[StencilPosition::Center];
column = center_index;
Expand Down Expand Up @@ -92,7 +94,7 @@ void SmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix(
const int bottom_index = i_theta_M1;
const int top_index = i_theta_P1;

const int center_nz_index = getCircleAscIndex(i_r, i_theta);
const int center_nz_index = getCircleAscIndex(i_r, i_theta, DirBC_Interior);

const double left_value = -coeff1 * (arr[center] + arr[left]);
const double right_value = -coeff2 * (arr[center] + arr[right]);
Expand All @@ -106,7 +108,7 @@ void SmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix(
row = center_index;
ptr = center_nz_index;

const Stencil& CenterStencil = getStencil(i_r);
const Stencil& CenterStencil = getStencil(i_r, DirBC_Interior);

offset = CenterStencil[StencilPosition::Center];
column = center_index;
Expand All @@ -130,10 +132,14 @@ void SmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix(
}
}

} // namespace smoother_take

template <class LevelCacheType>
typename SmootherTake<LevelCacheType>::InnerBoundaryMatrix
SmootherTake<LevelCacheType>::buildInteriorBoundarySolverMatrix()
{
using smoother_take::nodeBuildInteriorBoundarySolverMatrix;

const PolarGrid& grid = Smoother<LevelCacheType>::grid_;
const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
const bool DirBC_Interior = Smoother<LevelCacheType>::DirBC_Interior_;
Expand All @@ -149,7 +155,8 @@ SmootherTake<LevelCacheType>::buildInteriorBoundarySolverMatrix()
// the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix,
// which is extracted by the COO_Mumps_Solver internally.
#ifdef GMGPOLAR_USE_MUMPS
const int nnz = getNonZeroCountCircleAsc(i_r);
using smoother_take::getNonZeroCountCircleAsc;
const int nnz = getNonZeroCountCircleAsc(i_r, grid, DirBC_Interior);
SparseMatrixCOO<double, Kokkos::HostSpace> inner_boundary_solver_matrix(ntheta, ntheta, nnz);
inner_boundary_solver_matrix.is_symmetric(true);
#else
Expand All @@ -169,11 +176,18 @@ SmootherTake<LevelCacheType>::buildInteriorBoundarySolverMatrix()
ConstVector<double> detDF = level_cache.detDF();
ConstVector<double> coeff_beta = level_cache.coeff_beta();

#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 0; i_theta < ntheta; i_theta++) {
nodeBuildInteriorBoundarySolverMatrix(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, arr, att,
art, detDF, coeff_beta);
}
const PolarGrid* grid_ptr = &grid;

// Capture pointer to matrix for use in Kokkos parallel loop.
const InnerBoundaryMatrix* matrix_ptr = &inner_boundary_solver_matrix;

Kokkos::parallel_for(
"Smoother Take: BuildInnerBoundaryAsc", Kokkos::RangePolicy<>(0, ntheta), KOKKOS_LAMBDA(const int i_theta) {
nodeBuildInteriorBoundarySolverMatrix(i_theta, *grid_ptr, DirBC_Interior, *matrix_ptr, arr, att, art, detDF,
coeff_beta);
});

Kokkos::fence();

return inner_boundary_solver_matrix;
}
80 changes: 50 additions & 30 deletions include/Smoother/SmootherTake/buildTridiagonalAsc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,24 @@
namespace smoother_take
{

static inline void updateMatrixElement(BatchedTridiagonalSolver<double>& solver, int batch, int row, int column,
static inline void updateMatrixElement(const BatchedTridiagonalSolver<double>& solver, int batch, int row, int column,
double value)
{
if (row == column)
solver.main_diagonal(batch, row) = value;
solver.set_main_diagonal(batch, row, value);
else if (row == column - 1)
solver.sub_diagonal(batch, row) = value;
solver.set_sub_diagonal(batch, row, value);
else if (row == 0 && column == solver.matrixDimension() - 1)
solver.cyclic_corner(batch) = value;
solver.set_cyclic_corner(batch, value);
}

} // namespace smoother_take

template <class LevelCacheType>
void SmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrices(
int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, ConstVector<double>& arr, ConstVector<double>& att,
ConstVector<double>& art, ConstVector<double>& detDF, ConstVector<double>& coeff_beta)
// Build the tridiagonal solver matrices for a specific node (i_r, i_theta)
static inline void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
const BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
const BatchedTridiagonalSolver<double>& radial_tridiagonal_solver,
ConstVector<double>& arr, ConstVector<double>& att,
ConstVector<double>& art, ConstVector<double>& detDF,
ConstVector<double>& coeff_beta)
{
using smoother_take::updateMatrixElement;

Expand Down Expand Up @@ -302,14 +301,21 @@ void SmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrices(
}
}

} // namespace smoother_take

template <class LevelCacheType>
void SmootherTake<LevelCacheType>::buildTridiagonalSolverMatrices()
{
using smoother_take::nodeBuildTridiagonalSolverMatrices;

const PolarGrid& grid = Smoother<LevelCacheType>::grid_;
const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
const bool DirBC_Interior = Smoother<LevelCacheType>::DirBC_Interior_;
const int num_omp_threads = Smoother<LevelCacheType>::num_omp_threads_;

const BatchedTridiagonalSolver<double>* circle_tridiagonal_solver_ptr = &circle_tridiagonal_solver_;
const BatchedTridiagonalSolver<double>* radial_tridiagonal_solver_ptr = &radial_tridiagonal_solver_;

assert(level_cache.cacheDensityProfileCoefficients());
assert(level_cache.cacheDomainGeometry());

Expand All @@ -319,22 +325,36 @@ void SmootherTake<LevelCacheType>::buildTridiagonalSolverMatrices()
ConstVector<double> detDF = level_cache.detDF();
ConstVector<double> coeff_beta = level_cache.coeff_beta();

#pragma omp parallel num_threads(num_omp_threads)
{
#pragma omp for nowait
for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
}
}

#pragma omp for nowait
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
}
}
}
const PolarGrid* grid_ptr = &grid;

/* We split the loops into two regions to better respect the */
/* access patterns of the smoother and improve cache locality. */

// The For loop matches circular access pattern */
Kokkos::parallel_for(
"Smoother Take: BuildTridiagonalAsc (Circular)",
Kokkos::MDRangePolicy<Kokkos::Rank<2>>( // Rank of the index space
{0, 0}, // Starting point of the index space
{grid.numberSmootherCircles(), grid.ntheta()} // Ending point of the index space
),
// Kokkos lambda function to execute for each point in the index space
KOKKOS_LAMBDA(const int i_r, const int i_theta) {
nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver_,
radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
});

/* For loop matches radial access pattern */
Kokkos::parallel_for(
"Smoother Take: BuildTridiagonalAsc (Radial)",
Kokkos::MDRangePolicy<Kokkos::Rank<2>>( // Rank of the index space
{0, grid.numberSmootherCircles()}, // Starting point of the index space
{grid.ntheta(), grid.nr()} // Ending point of the index space
),
// Kokkos lambda function to execute for each point in the index space
KOKKOS_LAMBDA(const int i_theta, const int i_r) {
nodeBuildTridiagonalSolverMatrices(i_r, i_theta, *grid_ptr, DirBC_Interior, circle_tridiagonal_solver_,
radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
});

Kokkos::fence();
}
Loading
Loading