Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(APP_VERSION_STRING "${PROJECT_VERSION}")
option(OPTIMIZE_FOR_NATIVE "Enable native CPU optimizations outside Debug builds" OFF)
option(BUILD_BENCHMARKS "Build benchmarks" OFF)
option(ENABLE_IPO "Enable link-time optimization for non-Debug builds when supported" ON)
option(ENABLE_TBB "Enable Intel oneAPI TBB multithreading for physics simulation" ON)

set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down Expand Up @@ -68,6 +69,12 @@ add_dependencies(latticelab_lib wgsl_shaders)
include(embedded_translate)
add_dependencies(latticelab_lib embedded_translate)

if(ENABLE_TBB)
find_package(TBB REQUIRED)
target_link_libraries(latticelab_lib PUBLIC TBB::tbb)
target_compile_definitions(latticelab_lib PUBLIC ENABLE_TBB)
message(STATUS "TBB: enabled")
endif()
target_compile_options(latticelab_lib PRIVATE
$<$<AND:$<BOOL:${OPTIMIZE_FOR_NATIVE}>,$<NOT:$<CONFIG:Debug>>,$<CXX_COMPILER_ID:GNU,Clang,AppleClang>>:-march=native>
)
Expand All @@ -86,8 +93,6 @@ target_include_directories(latticelab_lib PUBLIC
${webgpu_cpp_SOURCE_DIR}/wgpu-native
)



if(APPLE)
find_library(COCOA_FRAMEWORK Cocoa)
find_library(METAL_FRAMEWORK Metal)
Expand All @@ -105,9 +110,7 @@ if(APPLE)
${METAL_FRAMEWORK}
${QUARTZCORE_FRAMEWORK}
)
endif ()


endif()
target_link_libraries(latticelab_lib PUBLIC
glfw
imgui ImGuiFileDialog_lib
Expand Down
41 changes: 35 additions & 6 deletions Engine/NeighborSearch/NeighborList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
#include "Engine/physics/AtomStorage.h"
#include "Engine/restrict.h"

#ifdef ENABLE_TBB
#include <tbb/blocked_range.h>
#include <tbb/parallel_for.h>
#endif

void NeighborList::setCutoff(float cutoff) {
cutoff_ = cutoff;
listRadius_ = cutoff_ + skin_;
Expand Down Expand Up @@ -69,15 +74,39 @@ void NeighborList::build(const AtomStorage& atoms, World& box) {

reserveListBuffers(atoms);

// Фаза 1: каждый поток строит список соседей своих атомов независимо
std::vector<std::vector<uint32_t>> perAtom(atomCount);
#ifdef ENABLE_TBB
tbb::parallel_for(tbb::blocked_range<uint32_t>(0, atomCount, 64),
[&](const tbb::blocked_range<uint32_t>& r) {
for (uint32_t i = r.begin(); i != r.end(); ++i)
writeAtomNeighbors(grid, x, y, z, i, x[i], y[i], z[i], perAtom[i]);
});
#else
for (uint32_t i = 0; i < atomCount; ++i) {
writeAtomNeighbors(grid, x, y, z, i, x[i], y[i], z[i], perAtom[i]);
}
#endif

// Фаза 2: вычисляем смещения (prefix sum) — последовательно
offsets_[0] = 0;
for (uint32_t i = 0; i < atomCount; ++i) {
const float xi = x[i];
const float yi = y[i];
const float zi = z[i];
// запись всех соседей атома в массив
writeAtomNeighbors(grid, x, y, z, i, xi, yi, zi, neighbors_);
offsets_[i + 1] = neighbors_.size();
offsets_[i + 1] = offsets_[i] + static_cast<uint32_t>(perAtom[i].size());
}
neighbors_.resize(offsets_[atomCount]);

// Фаза 3: копируем в плоский массив — снова параллельно
#ifdef ENABLE_TBB
tbb::parallel_for(tbb::blocked_range<uint32_t>(0, atomCount),
[&](const tbb::blocked_range<uint32_t>& r) {
for (uint32_t i = r.begin(); i != r.end(); ++i)
std::copy(perAtom[i].begin(), perAtom[i].end(), neighbors_.begin() + offsets_[i]);
});
#else
for (uint32_t i = 0; i < atomCount; ++i) {
std::copy(perAtom[i].begin(), perAtom[i].end(), neighbors_.begin() + offsets_[i]);
}
#endif

std::copy(x, x + atoms.mobileCount(), refPosX_.data());
std::copy(y, y + atoms.mobileCount(), refPosY_.data());
Expand Down
34 changes: 25 additions & 9 deletions Engine/physics/ForceField.cpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
#include "ForceField.h"

#include "Engine/NeighborSearch/NeighborList.h"
#include "Engine/World.h"
#include "Engine/metrics/Profiler.h"
#include "Engine/physics/AtomStorage.h"

#ifdef ENABLE_TBB
#include <tbb/blocked_range.h>
#include <tbb/parallel_for.h>
#endif

namespace {
template <bool UseLJ, bool UseCoulomb>
void computePairInteractionsImpl(AtomStorage& atoms, const NeighborList& neighborList, const LJForceField& ljForceField,
const CoulombForceField& coulombForceField) {
const auto& offsets = neighborList.offsets();
const auto& offsets = neighborList.offsets();
const auto& neighbours = neighborList.neighbors();
const size_t mobileN = atoms.mobileCount();

for (size_t atomIndex = 0; atomIndex < atoms.mobileCount(); ++atomIndex) {
auto processAtom = [&](size_t atomIndex) {
const uint32_t begin = offsets[atomIndex];
const uint32_t end = offsets[atomIndex + 1];
const uint32_t end = offsets[atomIndex + 1];
if (begin > end || static_cast<size_t>(end) > neighbours.size()) {
continue;
return;
}

const float posX = atoms.posX(atomIndex);
const float posY = atoms.posY(atomIndex);
const float posZ = atoms.posZ(atomIndex);
float forceX = atoms.forceX(atomIndex);
float forceY = atoms.forceY(atomIndex);
float forceZ = atoms.forceZ(atomIndex);
float forceX = atoms.forceX(atomIndex);
float forceY = atoms.forceY(atomIndex);
float forceZ = atoms.forceZ(atomIndex);
float potentialEnergy = atoms.energy(atomIndex);

const LJForceField::LJPairRow* ljPairRow = nullptr;
Expand All @@ -36,7 +43,7 @@ namespace {
charge = atoms.charge(atomIndex);
if (charge == 0.0f) {
if constexpr (!UseLJ) {
continue;
return;
}
}
}
Expand All @@ -62,7 +69,16 @@ namespace {
atoms.forceY(atomIndex) = forceY;
atoms.forceZ(atomIndex) = forceZ;
atoms.energy(atomIndex) = potentialEnergy;
}
};

#ifdef ENABLE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0, mobileN, 64),
[&](const tbb::blocked_range<size_t>& r) {
for (size_t i = r.begin(); i != r.end(); ++i) processAtom(i);
});
#else
for (size_t i = 0; i < mobileN; ++i) processAtom(i);
#endif
}
}

Expand Down
10 changes: 7 additions & 3 deletions Engine/physics/integrators/KDKScheme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@ void KDKScheme::halfKick(AtomStorage& atomStorage, float accelDamping, float dt)

const size_t mobileCount = atomStorage.mobileCount();

const float halfDt = 0.5f * accelDamping * dt;
#pragma GCC ivdep
for (size_t i = 0; i < mobileCount; ++i) {
vx[i] += 0.5f * fx[i] * invMass[i] * accelDamping * dt;
vy[i] += 0.5f * fy[i] * invMass[i] * accelDamping * dt;
vz[i] += 0.5f * fz[i] * invMass[i] * accelDamping * dt;
const float halfDtInvMass = halfDt * invMass[i];
vx[i] += fx[i] * halfDtInvMass;
vy[i] += fy[i] * halfDtInvMass;
vz[i] += fz[i] * halfDtInvMass;
}
}

Expand All @@ -47,6 +50,7 @@ void KDKScheme::drift(AtomStorage& atomStorage, float dt) {
const float* RESTRICT vz = atomStorage.vzData();

const size_t mobileCount = atomStorage.mobileCount();
#pragma GCC ivdep
for (size_t i = 0; i < mobileCount; ++i) {
x[i] += vx[i] * dt;
y[i] += vy[i] * dt;
Expand Down
58 changes: 56 additions & 2 deletions Engine/physics/integrators/VerletScheme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#include "Engine/metrics/Profiler.h"
#include "Engine/physics/integrators/StepOps.h"

#ifdef ENABLE_TBB
#include <tbb/blocked_range.h>
#include <tbb/parallel_for.h>
#endif

void VerletScheme::pipeline(StepData& stepData) const {
PROFILE_SCOPE("VerletScheme::pipeline");
// Расчет новых позиций
Expand Down Expand Up @@ -30,12 +35,36 @@ void VerletScheme::predict(AtomStorage& atomStorage, float dt) {

const float* RESTRICT invMass = atomStorage.invMassData();

#ifdef ENABLE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0, n),
[&](const tbb::blocked_range<size_t>& r) {
const size_t begin = r.begin();
float* __restrict__ lx = x + begin;
float* __restrict__ ly = y + begin;
float* __restrict__ lz = z + begin;
const float* __restrict__ lvx = vx + begin;
const float* __restrict__ lvy = vy + begin;
const float* __restrict__ lvz = vz + begin;
const float* __restrict__ lfx = fx + begin;
const float* __restrict__ lfy = fy + begin;
const float* __restrict__ lfz = fz + begin;
const float* __restrict__ lim = invMass + begin;
const size_t len = r.end() - begin;
#pragma GCC ivdep
for (size_t i = 0; i < len; ++i) {
lx[i] += (lvx[i] + lfx[i] * lim[i] * 0.5f * dt) * dt;
ly[i] += (lvy[i] + lfy[i] * lim[i] * 0.5f * dt) * dt;
lz[i] += (lvz[i] + lfz[i] * lim[i] * 0.5f * dt) * dt;
}
});
#else
#pragma GCC ivdep
for (size_t i = 0; i < n; ++i) {
x[i] += (vx[i] + fx[i] * invMass[i] * 0.5f * dt) * dt;
y[i] += (vy[i] + fy[i] * invMass[i] * 0.5f * dt) * dt;
z[i] += (vz[i] + fz[i] * invMass[i] * 0.5f * dt) * dt;
}
#endif
}

void VerletScheme::correct(AtomStorage& atomStorage, float accelDamping, float dt) {
Expand All @@ -56,12 +85,37 @@ void VerletScheme::correct(AtomStorage& atomStorage, float accelDamping, float d

const float* RESTRICT invMass = atomStorage.invMassData();

const float halfDt = 0.5f * accelDamping * dt;
#ifdef ENABLE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0, n),
[&](const tbb::blocked_range<size_t>& r) {
const size_t begin = r.begin();
const float* __restrict__ lfx = fx + begin;
const float* __restrict__ lfy = fy + begin;
const float* __restrict__ lfz = fz + begin;
const float* __restrict__ lpfx = pfx + begin;
const float* __restrict__ lpfy = pfy + begin;
const float* __restrict__ lpfz = pfz + begin;
float* __restrict__ lvx = vx + begin;
float* __restrict__ lvy = vy + begin;
float* __restrict__ lvz = vz + begin;
const float* __restrict__ lim = invMass + begin;
const size_t len = r.end() - begin;
#pragma GCC ivdep
for (size_t i = 0; i < len; ++i) {
const float halfDtInvMass = halfDt * lim[i];
lvx[i] += (lpfx[i] + lfx[i]) * halfDtInvMass;
lvy[i] += (lpfy[i] + lfy[i]) * halfDtInvMass;
lvz[i] += (lpfz[i] + lfz[i]) * halfDtInvMass;
}
});
#else
#pragma GCC ivdep
for (size_t i = 0; i < n; ++i) {
const float halfDtInvMass = 0.5f * accelDamping * dt * invMass[i];

const float halfDtInvMass = halfDt * invMass[i];
vx[i] += (pfx[i] + fx[i]) * halfDtInvMass;
vy[i] += (pfy[i] + fy[i]) * halfDtInvMass;
vz[i] += (pfz[i] + fz[i]) * halfDtInvMass;
}
#endif
}