diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl index 3a845970..7371c2c5 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl @@ -3,10 +3,11 @@ namespace extrapolated_smoother_give { -static inline void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, - ConstVector& rhs, Vector& result, double arr, double att, - double art, double detDF, double coeff_beta) +static KOKKOS_INLINE_FUNCTION void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const PolarGrid& grid, + bool DirBC_Interior, SmootherColor smoother_color, + ConstVector& x, ConstVector& rhs, + Vector& result, double arr, double att, + double art, double detDF, double coeff_beta) { assert(i_r >= 0 && i_r <= grid.numberSmootherCircles()); @@ -400,10 +401,11 @@ static inline void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const Polar namespace extrapolated_smoother_give { -static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, - ConstVector& rhs, Vector& result, double arr, double att, - double art, double detDF, double coeff_beta) +static KOKKOS_INLINE_FUNCTION void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const PolarGrid& grid, + bool DirBC_Interior, SmootherColor smoother_color, + ConstVector& x, ConstVector& rhs, + Vector& result, double arr, double att, + double art, double detDF, double coeff_beta) { assert(i_r >= grid.numberSmootherCircles() - 1 && i_r < grid.nr()); @@ -946,4 +948,4 @@ void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(int i_ nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, detDF, coeff_beta); } -} \ No newline at end of file +} diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl index 0d8ac1da..32b4f420 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl @@ -3,11 +3,12 @@ namespace extrapolated_smoother_take { -static inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - ConstVector& x, ConstVector& rhs, Vector& result, - ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta) +static KOKKOS_INLINE_FUNCTION void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const PolarGrid& grid, + bool DirBC_Interior, ConstVector& x, + ConstVector& rhs, Vector& result, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta) { assert(i_r >= 0 && i_r <= grid.numberSmootherCircles()); @@ -185,11 +186,11 @@ static inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const Polar } } -static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - ConstVector& x, ConstVector& rhs, Vector& result, - ConstVector& arr, const ConstVector& att, - ConstVector& art, const ConstVector& detDF, - ConstVector& coeff_beta) +static KOKKOS_INLINE_FUNCTION void +nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, ConstVector& x, + ConstVector& rhs, Vector& result, ConstVector& arr, + const ConstVector& att, ConstVector& art, const ConstVector& detDF, + ConstVector& coeff_beta) { assert(i_r >= grid.numberSmootherCircles() - 1 && i_r < grid.nr()); @@ -483,14 +484,22 @@ void ExtrapolatedSmootherTake::applyAscOrthoBlackCircleSection(C /* The outer most circle next to the radial section is defined to be black. */ const int start_black_circles = (grid.numberSmootherCircles() % 2 == 0) ? 1 : 0; - -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_black_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + const int num_black_circles = (grid.numberSmootherCircles() - start_black_circles + 1) / 2; + + Kokkos::parallel_for( + "ExtrapolatedSmootherTake: ApplyAscOrtho (Black Circular)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, 0}, // Starting point of the index space + {num_black_circles, grid.ntheta()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int circle_task, const int i_theta) { + int i_r = start_black_circles + circle_task * 2; nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, coeff_beta); - } - } + }); + + Kokkos::fence(); } template @@ -516,14 +525,22 @@ void ExtrapolatedSmootherTake::applyAscOrthoWhiteCircleSection(C /* The outer most circle next to the radial section is defined to be black. */ const int start_white_circles = (grid.numberSmootherCircles() % 2 == 0) ? 0 : 1; - -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_r = start_white_circles; i_r < grid.numberSmootherCircles(); i_r += 2) { - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + const int num_white_circles = (grid.numberSmootherCircles() - start_white_circles + 1) / 2; + + Kokkos::parallel_for( + "ExtrapolatedSmootherTake: ApplyAscOrtho (White Circular)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, 0}, // Starting point of the index space + {num_white_circles, grid.ntheta()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int circle_task, const int i_theta) { + const int i_r = start_white_circles + circle_task * 2; nodeApplyAscOrthoCircleTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, coeff_beta); - } - } + }); + + Kokkos::fence(); } template @@ -547,13 +564,24 @@ void ExtrapolatedSmootherTake::applyAscOrthoBlackRadialSection(C ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta += 2) { - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + assert(grid.ntheta() % 2 == 0); + const int start_black_radials = 0; + const int num_black_radial_lines = grid.ntheta() / 2; + + Kokkos::parallel_for( + "ExtrapolatedSmootherTake: ApplyAscOrtho (Black Radial)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, grid.numberSmootherCircles()}, // Starting point of the index space + {num_black_radial_lines, grid.nr()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int radial_task, const int i_r) { + const int i_theta = start_black_radials + radial_task * 2; nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, coeff_beta); - } - } + }); + + Kokkos::fence(); } template @@ -577,11 +605,22 @@ void ExtrapolatedSmootherTake::applyAscOrthoWhiteRadialSection(C ConstVector detDF = level_cache.detDF(); ConstVector coeff_beta = level_cache.coeff_beta(); -#pragma omp parallel for num_threads(num_omp_threads) - for (int i_theta = 1; i_theta < grid.ntheta(); i_theta += 2) { - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + assert(grid.ntheta() % 2 == 0); + const int start_white_radials = 1; + const int num_white_radial_lines = grid.ntheta() / 2; + + Kokkos::parallel_for( + "ExtrapolatedSmootherTake: ApplyAscOrtho (White Radial)", + Kokkos::MDRangePolicy>( // Rank of the index space + {0, grid.numberSmootherCircles()}, // Starting point of the index space + {num_white_radial_lines, grid.nr()} // Ending point of the index space + ), + // Kokkos lambda function to execute for each point in the index space + KOKKOS_LAMBDA(const int radial_task, const int i_r) { + const int i_theta = start_white_radials + radial_task * 2; nodeApplyAscOrthoRadialTake(i_r, i_theta, grid, DirBC_Interior, x, rhs, temp, arr, att, art, detDF, coeff_beta); - } - } -} \ No newline at end of file + }); + + Kokkos::fence(); +} diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h index 841d8203..c14dc32a 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h @@ -172,6 +172,8 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother /* Orthogonal application */ /* ---------------------- */ + // Functions must be public due to cuda restriction +public: // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); @@ -183,8 +185,6 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother /* Line-wise solvers */ /* ----------------- */ - // Functions must be public due to cuda restriction -public: // Solve the linear system: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // Parameter mapping: diff --git a/include/Smoother/SmootherGive/applyAscOrtho.inl b/include/Smoother/SmootherGive/applyAscOrtho.inl index f91f5366..c197e188 100644 --- a/include/Smoother/SmootherGive/applyAscOrtho.inl +++ b/include/Smoother/SmootherGive/applyAscOrtho.inl @@ -3,10 +3,11 @@ namespace smoother_give { -static inline void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, - ConstVector& rhs, Vector& result, double arr, double att, - double art, double detDF, double coeff_beta) +static KOKKOS_INLINE_FUNCTION void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const PolarGrid& grid, + bool DirBC_Interior, SmootherColor smoother_color, + ConstVector& x, ConstVector& rhs, + Vector& result, double arr, double att, + double art, double detDF, double coeff_beta) { assert(i_r >= 0 && i_r <= grid.numberSmootherCircles()); @@ -187,10 +188,11 @@ static inline void nodeApplyAscOrthoCircleGive(int i_r, int i_theta, const Polar } } -static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, - ConstVector& rhs, Vector& result, double arr, double att, - double art, double detDF, double coeff_beta) +static KOKKOS_INLINE_FUNCTION void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const PolarGrid& grid, + bool DirBC_Interior, SmootherColor smoother_color, + ConstVector& x, ConstVector& rhs, + Vector& result, double arr, double att, + double art, double detDF, double coeff_beta) { assert(i_r >= grid.numberSmootherCircles() - 1 && i_r < grid.nr()); @@ -479,4 +481,4 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, detDF, coeff_beta); } -} \ No newline at end of file +}