ElectroZybr · kranks-uga · May 25, 2026 · May 28, 2026 · May 31, 2026 · May 31, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -11,6 +11,7 @@ set(APP_VERSION_STRING "${PROJECT_VERSION}")
 option(OPTIMIZE_FOR_NATIVE "Enable native CPU optimizations outside Debug builds" OFF)
 option(BUILD_BENCHMARKS "Build benchmarks" OFF)
 option(ENABLE_IPO "Enable link-time optimization for non-Debug builds when supported" ON)
+option(ENABLE_TBB "Enable Intel oneAPI TBB multithreading for physics simulation" ON)
 
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -68,6 +69,12 @@ add_dependencies(latticelab_lib wgsl_shaders)
 include(embedded_translate)
 add_dependencies(latticelab_lib embedded_translate)
 
+if(ENABLE_TBB)
+    find_package(TBB REQUIRED)
+    target_link_libraries(latticelab_lib PUBLIC TBB::tbb)
+    target_compile_definitions(latticelab_lib PUBLIC ENABLE_TBB)
+    message(STATUS "TBB: enabled")
+endif()
 target_compile_options(latticelab_lib PRIVATE
     $<$<AND:$<BOOL:${OPTIMIZE_FOR_NATIVE}>,$<NOT:$<CONFIG:Debug>>,$<CXX_COMPILER_ID:GNU,Clang,AppleClang>>:-march=native>
 )
@@ -86,8 +93,6 @@ target_include_directories(latticelab_lib PUBLIC
     ${webgpu_cpp_SOURCE_DIR}/wgpu-native
 )
 
-
-
 if(APPLE)
     find_library(COCOA_FRAMEWORK Cocoa)
     find_library(METAL_FRAMEWORK Metal)
@@ -105,9 +110,7 @@ if(APPLE)
         ${METAL_FRAMEWORK}
         ${QUARTZCORE_FRAMEWORK}
     )
-endif ()
-
-
+endif()
 target_link_libraries(latticelab_lib PUBLIC
     glfw
     imgui ImGuiFileDialog_lib

diff --git a/Engine/NeighborSearch/NeighborList.cpp b/Engine/NeighborSearch/NeighborList.cpp
@@ -11,6 +11,11 @@
 #include "Engine/physics/AtomStorage.h"
 #include "Engine/restrict.h"
 
+#ifdef ENABLE_TBB
+#include <tbb/blocked_range.h>
+#include <tbb/parallel_for.h>
+#endif
+
 void NeighborList::setCutoff(float cutoff) {
     cutoff_ = cutoff;
     listRadius_ = cutoff_ + skin_;
@@ -69,15 +74,39 @@ void NeighborList::build(const AtomStorage& atoms, World& box) {
 
     reserveListBuffers(atoms);
 
+    // Фаза 1: каждый поток строит список соседей своих атомов независимо
+    std::vector<std::vector<uint32_t>> perAtom(atomCount);
+#ifdef ENABLE_TBB
+    tbb::parallel_for(tbb::blocked_range<uint32_t>(0, atomCount, 64),
+        [&](const tbb::blocked_range<uint32_t>& r) {
+            for (uint32_t i = r.begin(); i != r.end(); ++i)
+                writeAtomNeighbors(grid, x, y, z, i, x[i], y[i], z[i], perAtom[i]);
+        });
+#else
+    for (uint32_t i = 0; i < atomCount; ++i) {
+        writeAtomNeighbors(grid, x, y, z, i, x[i], y[i], z[i], perAtom[i]);
+    }
+#endif
+
+    // Фаза 2: вычисляем смещения (prefix sum) — последовательно
     offsets_[0] = 0;
     for (uint32_t i = 0; i < atomCount; ++i) {
-        const float xi = x[i];
-        const float yi = y[i];
-        const float zi = z[i];
-        // запись всех соседей атома в массив
-        writeAtomNeighbors(grid, x, y, z, i, xi, yi, zi, neighbors_);
-        offsets_[i + 1] = neighbors_.size();
+        offsets_[i + 1] = offsets_[i] + static_cast<uint32_t>(perAtom[i].size());
+    }
+    neighbors_.resize(offsets_[atomCount]);
+
+    // Фаза 3: копируем в плоский массив — снова параллельно
+#ifdef ENABLE_TBB
+    tbb::parallel_for(tbb::blocked_range<uint32_t>(0, atomCount),
+        [&](const tbb::blocked_range<uint32_t>& r) {
+            for (uint32_t i = r.begin(); i != r.end(); ++i)
+                std::copy(perAtom[i].begin(), perAtom[i].end(), neighbors_.begin() + offsets_[i]);
+        });
+#else
+    for (uint32_t i = 0; i < atomCount; ++i) {
+        std::copy(perAtom[i].begin(), perAtom[i].end(), neighbors_.begin() + offsets_[i]);
     }
+#endif
 
     std::copy(x, x + atoms.mobileCount(), refPosX_.data());
     std::copy(y, y + atoms.mobileCount(), refPosY_.data());

diff --git a/Engine/physics/ForceField.cpp b/Engine/physics/ForceField.cpp
@@ -1,29 +1,36 @@
 #include "ForceField.h"
 
 #include "Engine/NeighborSearch/NeighborList.h"
+#include "Engine/World.h"
 #include "Engine/metrics/Profiler.h"
 #include "Engine/physics/AtomStorage.h"
 
+#ifdef ENABLE_TBB
+#include <tbb/blocked_range.h>
+#include <tbb/parallel_for.h>
+#endif
+
 namespace {
     template <bool UseLJ, bool UseCoulomb>
     void computePairInteractionsImpl(AtomStorage& atoms, const NeighborList& neighborList, const LJForceField& ljForceField,
                                      const CoulombForceField& coulombForceField) {
-        const auto& offsets = neighborList.offsets();
+        const auto& offsets    = neighborList.offsets();
         const auto& neighbours = neighborList.neighbors();
+        const size_t mobileN   = atoms.mobileCount();
 
-        for (size_t atomIndex = 0; atomIndex < atoms.mobileCount(); ++atomIndex) {
+        auto processAtom = [&](size_t atomIndex) {
             const uint32_t begin = offsets[atomIndex];
-            const uint32_t end = offsets[atomIndex + 1];
+            const uint32_t end   = offsets[atomIndex + 1];
             if (begin > end || static_cast<size_t>(end) > neighbours.size()) {
-                continue;
+                return;
             }
 
             const float posX = atoms.posX(atomIndex);
             const float posY = atoms.posY(atomIndex);
             const float posZ = atoms.posZ(atomIndex);
-            float forceX = atoms.forceX(atomIndex);
-            float forceY = atoms.forceY(atomIndex);
-            float forceZ = atoms.forceZ(atomIndex);
+            float forceX         = atoms.forceX(atomIndex);
+            float forceY         = atoms.forceY(atomIndex);
+            float forceZ         = atoms.forceZ(atomIndex);
             float potentialEnergy = atoms.energy(atomIndex);
 
             const LJForceField::LJPairRow* ljPairRow = nullptr;
@@ -36,7 +43,7 @@ namespace {
                 charge = atoms.charge(atomIndex);
                 if (charge == 0.0f) {
                     if constexpr (!UseLJ) {
-                        continue;
+                        return;
                     }
                 }
             }
@@ -62,7 +69,16 @@ namespace {
             atoms.forceY(atomIndex) = forceY;
             atoms.forceZ(atomIndex) = forceZ;
             atoms.energy(atomIndex) = potentialEnergy;
-        }
+        };
+
+#ifdef ENABLE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(0, mobileN, 64),
+            [&](const tbb::blocked_range<size_t>& r) {
+                for (size_t i = r.begin(); i != r.end(); ++i) processAtom(i);
+            });
+#else
+        for (size_t i = 0; i < mobileN; ++i) processAtom(i);
+#endif
     }
 }
 

diff --git a/Engine/physics/integrators/KDKScheme.cpp b/Engine/physics/integrators/KDKScheme.cpp
@@ -29,10 +29,13 @@ void KDKScheme::halfKick(AtomStorage& atomStorage, float accelDamping, float dt)
 
     const size_t mobileCount = atomStorage.mobileCount();
 
+    const float halfDt = 0.5f * accelDamping * dt;
+#pragma GCC ivdep
     for (size_t i = 0; i < mobileCount; ++i) {
-        vx[i] += 0.5f * fx[i] * invMass[i] * accelDamping * dt;
-        vy[i] += 0.5f * fy[i] * invMass[i] * accelDamping * dt;
-        vz[i] += 0.5f * fz[i] * invMass[i] * accelDamping * dt;
+        const float halfDtInvMass = halfDt * invMass[i];
+        vx[i] += fx[i] * halfDtInvMass;
+        vy[i] += fy[i] * halfDtInvMass;
+        vz[i] += fz[i] * halfDtInvMass;
     }
 }
 
@@ -47,6 +50,7 @@ void KDKScheme::drift(AtomStorage& atomStorage, float dt) {
     const float* RESTRICT vz = atomStorage.vzData();
 
     const size_t mobileCount = atomStorage.mobileCount();
+#pragma GCC ivdep
     for (size_t i = 0; i < mobileCount; ++i) {
         x[i] += vx[i] * dt;
         y[i] += vy[i] * dt;

diff --git a/Engine/physics/integrators/VerletScheme.cpp b/Engine/physics/integrators/VerletScheme.cpp
@@ -3,6 +3,11 @@
 #include "Engine/metrics/Profiler.h"
 #include "Engine/physics/integrators/StepOps.h"
 
+#ifdef ENABLE_TBB
+#include <tbb/blocked_range.h>
+#include <tbb/parallel_for.h>
+#endif
+
 void VerletScheme::pipeline(StepData& stepData) const {
     PROFILE_SCOPE("VerletScheme::pipeline");
     // Расчет новых позиций
@@ -30,12 +35,36 @@ void VerletScheme::predict(AtomStorage& atomStorage, float dt) {
 
     const float* RESTRICT invMass = atomStorage.invMassData();
 
+#ifdef ENABLE_TBB
+    tbb::parallel_for(tbb::blocked_range<size_t>(0, n),
+        [&](const tbb::blocked_range<size_t>& r) {
+            const size_t begin = r.begin();
+            float* __restrict__ lx = x + begin;
+            float* __restrict__ ly = y + begin;
+            float* __restrict__ lz = z + begin;
+            const float* __restrict__ lvx = vx + begin;
+            const float* __restrict__ lvy = vy + begin;
+            const float* __restrict__ lvz = vz + begin;
+            const float* __restrict__ lfx = fx + begin;
+            const float* __restrict__ lfy = fy + begin;
+            const float* __restrict__ lfz = fz + begin;
+            const float* __restrict__ lim = invMass + begin;
+            const size_t len = r.end() - begin;
+#pragma GCC ivdep
+            for (size_t i = 0; i < len; ++i) {
+                lx[i] += (lvx[i] + lfx[i] * lim[i] * 0.5f * dt) * dt;
+                ly[i] += (lvy[i] + lfy[i] * lim[i] * 0.5f * dt) * dt;
+                lz[i] += (lvz[i] + lfz[i] * lim[i] * 0.5f * dt) * dt;
+            }
+        });
+#else
 #pragma GCC ivdep
     for (size_t i = 0; i < n; ++i) {
         x[i] += (vx[i] + fx[i] * invMass[i] * 0.5f * dt) * dt;
         y[i] += (vy[i] + fy[i] * invMass[i] * 0.5f * dt) * dt;
         z[i] += (vz[i] + fz[i] * invMass[i] * 0.5f * dt) * dt;
     }
+#endif
 }
 
 void VerletScheme::correct(AtomStorage& atomStorage, float accelDamping, float dt) {
@@ -56,12 +85,37 @@ void VerletScheme::correct(AtomStorage& atomStorage, float accelDamping, float d
 
     const float* RESTRICT invMass = atomStorage.invMassData();
 
+    const float halfDt = 0.5f * accelDamping * dt;
+#ifdef ENABLE_TBB
+    tbb::parallel_for(tbb::blocked_range<size_t>(0, n),
+        [&](const tbb::blocked_range<size_t>& r) {
+            const size_t begin = r.begin();
+            const float* __restrict__ lfx = fx + begin;
+            const float* __restrict__ lfy = fy + begin;
+            const float* __restrict__ lfz = fz + begin;
+            const float* __restrict__ lpfx = pfx + begin;
+            const float* __restrict__ lpfy = pfy + begin;
+            const float* __restrict__ lpfz = pfz + begin;
+            float* __restrict__ lvx = vx + begin;
+            float* __restrict__ lvy = vy + begin;
+            float* __restrict__ lvz = vz + begin;
+            const float* __restrict__ lim = invMass + begin;
+            const size_t len = r.end() - begin;
+#pragma GCC ivdep
+            for (size_t i = 0; i < len; ++i) {
+                const float halfDtInvMass = halfDt * lim[i];
+                lvx[i] += (lpfx[i] + lfx[i]) * halfDtInvMass;
+                lvy[i] += (lpfy[i] + lfy[i]) * halfDtInvMass;
+                lvz[i] += (lpfz[i] + lfz[i]) * halfDtInvMass;
+            }
+        });
+#else
 #pragma GCC ivdep
     for (size_t i = 0; i < n; ++i) {
-        const float halfDtInvMass = 0.5f * accelDamping * dt * invMass[i];
-
+        const float halfDtInvMass = halfDt * invMass[i];
         vx[i] += (pfx[i] + fx[i]) * halfDtInvMass;
         vy[i] += (pfy[i] + fy[i]) * halfDtInvMass;
         vz[i] += (pfz[i] + fz[i]) * halfDtInvMass;
     }
+#endif
 }