Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 111 additions & 171 deletions include/Residual/ResidualGive/applyAGive.inl

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions include/Residual/ResidualGive/residualGive.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,11 @@ class ResidualGive : public Residual<LevelCacheType>
const int num_omp_threads);
~ResidualGive() override = default;

void computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const final;

void applySystemOperator(Vector<double> result, ConstVector<double> x) const final;

private:
void applyCircleSection(const int i_r, Vector<double> result, ConstVector<double> x) const;
void applyRadialSection(const int i_theta, Vector<double> result, ConstVector<double> x) const;
void computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const final;
};

#include "residualGive.inl"
#include "applyAGive.inl"

} // namespace gmgpolar
114 changes: 8 additions & 106 deletions include/Residual/ResidualGive/residualGive.inl
Original file line number Diff line number Diff line change
Expand Up @@ -7,105 +7,6 @@ ResidualGive<LevelCacheType>::ResidualGive(const PolarGrid& grid, const LevelCac
{
}

/* ------------ */
/* result = A*x */

// clang-format off
template <class LevelCacheType>
void ResidualGive<LevelCacheType>::applySystemOperator(Vector<double> result, ConstVector<double> x) const
{
assert(result.size() == x.size());

assign(result, 0.0);

const PolarGrid& grid = Residual<LevelCacheType>::grid_;

const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;

/* Single-threaded execution */
if (num_omp_threads == 1) {
for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
applyCircleSection(i_r, result, x);
}
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
applyRadialSection(i_theta, result, x);
}
}
/* Multi-threaded execution */
else {
const int num_circle_tasks = grid.numberSmootherCircles();
const int additional_radial_tasks = grid.ntheta() % 3;
const int num_radial_tasks = grid.ntheta() - additional_radial_tasks;

#pragma omp parallel num_threads(num_omp_threads)
{
/* Circle Section 0 */
#pragma omp for
for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
applyCircleSection(i_r, result, x);
}
/* Circle Section 1 */
#pragma omp for
for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
applyCircleSection(i_r, result, x);
}
/* Circle Section 2 */
#pragma omp for nowait
for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
applyCircleSection(i_r, result, x);
}

/* Radial Section 0 */
#pragma omp for
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 0) {
int i_theta = radial_task + additional_radial_tasks;
applyRadialSection(i_theta, result, x);
}
else {
if (additional_radial_tasks == 0) {
applyRadialSection(0, result, x);
}
else if (additional_radial_tasks >= 1) {
applyRadialSection(0, result, x);
applyRadialSection(1, result, x);
}
}
}
/* Radial Section 1 */
#pragma omp for
for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 1) {
int i_theta = radial_task + additional_radial_tasks;
applyRadialSection(i_theta, result, x);
}
else {
if (additional_radial_tasks == 0) {
applyRadialSection(1, result, x);
}
else if (additional_radial_tasks == 1) {
applyRadialSection(2, result, x);
}
else if (additional_radial_tasks == 2) {
applyRadialSection(2, result, x);
applyRadialSection(3, result, x);
}
}
}
/* Radial Section 2 */
#pragma omp for
for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
int i_theta = radial_task + additional_radial_tasks;
applyRadialSection(i_theta, result, x);
}
}
}
}
// clang-format on

/* ------------------ */
/* result = rhs - A*x */
template <class LevelCacheType>
Expand All @@ -117,10 +18,11 @@ void ResidualGive<LevelCacheType>::computeResidual(Vector<double> result, ConstV
applySystemOperator(result, x);

// Subtract A*x from rhs to get the residual.
const int n = result.size();
const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;
#pragma omp parallel for num_threads(num_omp_threads)
for (int i = 0; i < n; i++) {
result[i] = rhs[i] - result[i];
}
}
const int n = result.size();

Kokkos::parallel_for(
"Residual Give: Subtract A*x from rhs", Kokkos::RangePolicy<>(0, n),
KOKKOS_LAMBDA(const int i) { result[i] = rhs[i] - result[i]; });

Kokkos::fence();
}
9 changes: 4 additions & 5 deletions include/Residual/ResidualTake/applyATake.inl
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ static KOKKOS_INLINE_FUNCTION void node_apply_a_take(const int i_r, const int i_
const double coeff4 = 0.5 * (h1 + h2) / k2;
const double coeff5 = 0.25 * (h1 + h2) * (k1 + k2);

const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
const int i_theta_Across = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2);

// Across origin: (i_r - 1, i_theta) gets replaced with (i_r, i_theta + (grid.ntheta() / 2)).
const int left =
(i_r > 0) ? grid.index(i_r - 1, i_theta) : grid.index(i_r, grid.wrapThetaIndex(i_theta + grid.ntheta() / 2));
const int left = (i_r == 0) ? grid.index(i_r, i_theta_Across) : grid.index(i_r - 1, i_theta);
const int right = grid.index(i_r + 1, i_theta);
const int bottom = grid.index(i_r, i_theta_M1);
const int top = grid.index(i_r, i_theta_P1);
Expand Down
6 changes: 3 additions & 3 deletions include/Residual/ResidualTake/residualTake.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ class ResidualTake : public Residual<LevelCacheType>
const int num_omp_threads);
~ResidualTake() override = default;

void computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const override;

void applySystemOperator(Vector<double> result, ConstVector<double> x) const override;
void applySystemOperator(Vector<double> result, ConstVector<double> x) const final;
void computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const final;
};

#include "residualTake.inl"
#include "applyATake.inl"

} // namespace gmgpolar
12 changes: 6 additions & 6 deletions include/Residual/ResidualTake/residualTake.inl
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ void ResidualTake<LevelCacheType>::computeResidual(Vector<double> result, ConstV
{
assert(result.size() == x.size());

const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;

applySystemOperator(result, x);

// Subtract A*x from rhs to get the residual.
const int n = result.size();
#pragma omp parallel for num_threads(num_omp_threads)
for (int i = 0; i < n; i++) {
result[i] = rhs[i] - result[i];
}

Kokkos::parallel_for(
"Residual Take: Subtract A*x from rhs", Kokkos::RangePolicy<>(0, n),
KOKKOS_LAMBDA(const int i) { result[i] = rhs[i] - result[i]; });

Kokkos::fence();
}
3 changes: 1 addition & 2 deletions include/Residual/residual.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ class Residual
}
virtual ~Residual() = default;

virtual void applySystemOperator(Vector<double> result, ConstVector<double> x) const = 0;
virtual void computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const = 0;

virtual void applySystemOperator(Vector<double> result, ConstVector<double> x) const = 0;

protected:
/* ------------------- */
/* Constructor members */
Expand Down
Loading