Skip to content
Merged
4 changes: 2 additions & 2 deletions include/DirectSolver/DirectSolverGive/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row, int column,
double value)
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
4 changes: 2 additions & 2 deletions include/DirectSolver/DirectSolverTake/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace direct_solver_coo_mumps_take

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row, int column,
double value)
static inline void updateMatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ namespace extrapolated_smoother_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
/* Line-wise solvers */
/* ----------------- */

// Functions must be public due to cuda restriction
public:
// Solve the linear system:
// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho
// Parameter mapping:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveBlackCircleSection(Vector<do
}

// Move updated values to x
int start_black_circles = is_inner_circle_black ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_black_circles = is_inner_circle_black ? 0 : 1;
const int num_black_circles = (grid.numberSmootherCircles() - start_black_circles + 1) / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherGive: moveUpdatedValues (Black Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_black_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_black_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand Down Expand Up @@ -63,13 +68,18 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveWhiteCircleSection(Vector<do
}

// Move updated values to x
int start_white_circles = is_inner_circle_white ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_white_circles = is_inner_circle_white ? 0 : 1;
const int num_white_circles = (grid.numberSmootherCircles() - start_white_circles + 1) / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherGive: moveUpdatedValues (White Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_white_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_white_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -86,13 +96,20 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveBlackRadialSection(Vector<do
int batch_stride = 2;
radial_tridiagonal_solver_.solve_diagonal(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_black_radials = 0;
const int num_black_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherGive: moveUpdatedValues (Black Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_black_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_black_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -109,11 +126,18 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveWhiteRadialSection(Vector<do
int batch_stride = 2;
radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_white_radials = 1;
const int num_white_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherGive: moveUpdatedValues (White Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_white_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_white_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace extrapolated_smoother_take

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother<LevelCacheType>
/* Line-wise solvers */
/* ----------------- */

// Functions must be public due to cuda restriction
public:
// Solve the linear system:
// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho
// Parameter mapping:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@ void ExtrapolatedSmootherTake<LevelCacheType>::solveBlackCircleSection(Vector<do
}

// Move updated values to x
int start_black_circles = is_inner_circle_black ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_black_circles = is_inner_circle_black ? 0 : 1;
const int num_black_circles = (grid.numberSmootherCircles() - start_black_circles + 1) / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherTake: moveUpdatedValues (Black Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_black_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_black_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand Down Expand Up @@ -63,13 +68,18 @@ void ExtrapolatedSmootherTake<LevelCacheType>::solveWhiteCircleSection(Vector<do
}

// Move updated values to x
int start_white_circles = is_inner_circle_white ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_white_circles = is_inner_circle_white ? 0 : 1;
const int num_white_circles = (grid.numberSmootherCircles() - start_white_circles + 1) / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherTake: moveUpdatedValues (White Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_white_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_white_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -86,13 +96,20 @@ void ExtrapolatedSmootherTake<LevelCacheType>::solveBlackRadialSection(Vector<do
int batch_stride = 2;
radial_tridiagonal_solver_.solve_diagonal(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_black_radials = 0;
const int num_black_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherTake: moveUpdatedValues (Black Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_black_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_black_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -109,11 +126,18 @@ void ExtrapolatedSmootherTake<LevelCacheType>::solveWhiteRadialSection(Vector<do
int batch_stride = 2;
radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_white_radials = 1;
const int num_white_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"ExtrapolatedSmootherTake: moveUpdatedValues (White Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_white_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_white_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}
4 changes: 2 additions & 2 deletions include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ namespace smoother_give

#ifdef GMGPOLAR_USE_MUMPS
// When using the MUMPS solver, the matrix is assembled in COO format.
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset, int row,
int column, double value)
static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double, Kokkos::HostSpace>& matrix, int ptr, int offset,
int row, int column, double value)
{
matrix.set_row_index(ptr + offset, row);
matrix.set_col_index(ptr + offset, column);
Expand Down
2 changes: 2 additions & 0 deletions include/Smoother/SmootherGive/smootherGive.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ class SmootherGive : public Smoother<LevelCacheType>
/* Line-wise solvers */
/* ----------------- */

// Functions must be public due to cuda restriction
public:
// Solve the linear system:
// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho
// Parameter mapping:
Expand Down
80 changes: 52 additions & 28 deletions include/Smoother/SmootherGive/solveAscSystem.inl
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,18 @@ void SmootherGive<LevelCacheType>::solveBlackCircleSection(Vector<double> x, Vec
}

// Move updated values to x
int start_black_circles = is_inner_circle_black ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_black_circles = is_inner_circle_black ? 0 : 1;
const int num_black_circles = (grid.numberSmootherCircles() - start_black_circles + 1) / 2;
Kokkos::parallel_for(
"SmootherGive: moveUpdatedValues (Black Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_black_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_black_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -53,13 +58,18 @@ void SmootherGive<LevelCacheType>::solveWhiteCircleSection(Vector<double> x, Vec
}

// Move updated values to x
int start_white_circles = is_inner_circle_white ? 0 : 1;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) {
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
const int start_white_circles = is_inner_circle_white ? 0 : 1;
const int num_white_circles = (grid.numberSmootherCircles() - start_white_circles + 1) / 2;
Kokkos::parallel_for(
"SmootherGive: moveUpdatedValues (White Circular)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, 0},
{num_white_circles, grid.ntheta()}),
KOKKOS_LAMBDA(const int circle_task, const int i_theta) {
const int i_r = start_white_circles + circle_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -76,13 +86,20 @@ void SmootherGive<LevelCacheType>::solveBlackRadialSection(Vector<double> x, Vec
int batch_stride = 2;
radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_black_radials = 0;
const int num_black_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"SmootherGive: moveUpdatedValues (Black Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_black_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_black_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}

template <class LevelCacheType>
Expand All @@ -99,11 +116,18 @@ void SmootherGive<LevelCacheType>::solveWhiteRadialSection(Vector<double> x, Vec
int batch_stride = 2;
radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride);

// Move updated values to x
#pragma omp parallel for num_threads(num_omp_threads)
for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) {
for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
}
}
// Move updated values to x
assert(grid.ntheta() % 2 == 0);
const int start_white_radials = 1;
const int num_white_radial_lines = grid.ntheta() / 2;
Kokkos::parallel_for(
"SmootherGive: moveUpdatedValues (White Radial)",
Kokkos::MDRangePolicy<Kokkos::DefaultHostExecutionSpace, Kokkos::Rank<2>>({0, grid.numberSmootherCircles()},
{num_white_radial_lines, grid.nr()}),
KOKKOS_LAMBDA(const int radial_task, const int i_r) {
const int i_theta = start_white_radials + radial_task * 2;
const int index = grid.index(i_r, i_theta);
x[index] = temp[index];
});
Kokkos::fence();
}
Loading
Loading