Skip to content

小彭老师大大,有关mpm的流体模拟函数优化 #10

@dd123-a

Description

@dd123-a

void Simulate() {

// CLEAR GRID
std::size_t grid_size = grid.size();

// 确保 grid_size 不超出 int 的范围

#pragma omp parallel for
for (int i = 0; i < static_cast(grid_size); ++i) {
grid[i].vel = glm::vec3(0.0f);
grid[i].mass = 0.0f;
}

// P2G_1
//质量与动量转移
 #pragma omp parallel for
for (int i = 0; i < particles.size(); i++) {
    auto &p = particles[i];
    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];
    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);
                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;
                glm::vec3 Q = p.C * cell_dist;

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                float mass_contrib = weight * particle_mass;
                grid[cell_index].mass += mass_contrib;
                grid[cell_index].vel += mass_contrib * (p.vel + Q);
            }

        }
    }
}

// P2G_2
//密度计算和压力计算
 #pragma omp parallel for
for (int i = 0; i < particles.size(); i++) {
    auto &p = particles[i];

    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];
    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    float density = 0.0f;
    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                density += grid[cell_index].mass * weight;
            }
        }
    }

    float volume = particle_mass / density;
    float pressure = std::max(-0.1f, eos_stiffness *
                                     (std::pow(density / rest_density, eos_power) - 1.0f));

    glm::mat3 stress = glm::mat3(
            -pressure, 0, 0,
            0, -pressure, 0,
            0, 0, -pressure
    );

     glm::mat3 strain = p.C;

     //float trace = strain[0][0] + strain[1][0] + strain[2][0]; // DEBUG
     float trace = glm::determinant(strain);
     strain[0][0] = strain[1][0] = strain[2][0] = trace;

     glm::mat3 viscosity_term = dynamic_viscosity * strain;
     stress += viscosity_term;

    auto eq_16_term_0 = -volume * 4 * stress * dt;

    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);

                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                glm::vec3 momentum = (eq_16_term_0 * weight) * cell_dist;
                grid[cell_index].vel += momentum;
            }
        }
    }
}

// GRID UPDATE
// 获取 grid 的大小
grid_size = grid.size();

// GRID UPDATE

#pragma omp parallel for
for (int i = 0; i < static_cast(grid_size); ++i) {
auto& cell = grid[static_caststd::size_t(i)]; // 使用 static_cast 将索引转换为 std::size_t
if (cell.mass > 0) {
cell.vel /= cell.mass;
cell.vel += dt * glm::vec3(0.0f, gravity, 0.0f);

        int index = cell.index;
        int x = index / (grid_res * grid_res);
        index /= grid_res;
        int y = (index / grid_res) % grid_res;
        index /= grid_res;
        int z = index;

        if (x < 1 || x > grid_res - 2) { cell.vel.x = 0.0f; }
        if (y < 1 || y > grid_res - 2) { cell.vel.y = 0.0f; }
        if (z < 1 || z > grid_res - 2) { cell.vel.z = 0.0f; }
    }
}


// G2P
 #pragma omp parallel for
for (int i = 0; i < static_cast<int>(particles.size()); ++i) {
    auto& p = particles[static_cast<std::size_t>(i)]; // 使用索引访问粒子

    p.vel = glm::vec3(0.0f);

    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];

    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    glm::mat3 B = glm::mat3(0.0f);
    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;
                // std::cout << weight << std::endl;
                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                    cell_idx.y + gy - 1,
                    cell_idx.z + gz - 1);

                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                int cell_index = (int)cell_pos.x +
                    (int)cell_pos.y * grid_res +
                    (int)cell_pos.z * grid_res * grid_res;

                glm::vec3 weighted_velocity = grid[cell_index].vel * weight;

                B += glm::mat3(weighted_velocity * cell_dist.x,
                    weighted_velocity * cell_dist.y,
                    weighted_velocity * cell_dist.z);

                p.vel += weighted_velocity;
            }
        }
    }

    p.C = B * 4.0f;
    p.vel *= damping;
    p.pos += p.vel * dt;
    p.pos = glm::clamp(p.pos, 1.0f, grid_res - 2.0f);

    glm::vec3 x_n = p.pos + p.vel;
    const float wall_min = 3.0f;
    const float wall_max = grid_res - 4.0f;
    if (x_n.x < wall_min) p.vel.x += (wall_min - x_n.x);
    if (x_n.x > wall_max) p.vel.x += (wall_max - x_n.x);
    if (x_n.y < wall_min) p.vel.y += (wall_min - x_n.y);
    if (x_n.y > wall_max) p.vel.y += (wall_max - x_n.y);
    if (x_n.z < wall_min) p.vel.z += (wall_min - x_n.z);
    if (x_n.z > wall_max) p.vel.z += (wall_max - x_n.z);
}

}这是使用openmp的进行了加速,但是我有在youtube上看到可以对其用simd指令优化到单线程的也能流畅运行的程度,但本人对此一窍不通(悲),所以来探讨一下是否有可行性。(本人尝试过使用simd进行改写,但是debug的太痛苦了,模板元什么的最讨厌了)https://github.com/Hab5/mls-mpm这是对应的完整代码仓库,https://www.youtube.com/watch?v=4Y58Pg9tpSo这是对应的YouTube视频介绍(梦开始的地方)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions