From dd20f2f8320a25ab4de14752bd1b81ec5932bc83 Mon Sep 17 00:00:00 2001 From: Aethdv Date: Fri, 9 Jan 2026 15:31:33 +0100 Subject: [PATCH 1/4] Optimize allocation, lockless accumulation, and tape flushing - **Startup**: Pre-scan FEN files to count lines. Reserve and vectors exact size upfront to prevent reallocations. - **Huge Pages**: Apply to the reserved capacity. (measured no speedup) - **Concurrency**: Replace mutex-based gradient accumulation with per-thread buffers. - **Reduction**: Use to sum thread gradients and apply optimizer steps, removing lock contention. - **Tape Management**: Process positions in chunks of 1024 to flush the autograd tape (/) frequently. --- src/evaltune_main.cpp | 119 ++++++++++++++++++++++++++++++------------ src/util/mem.hpp | 7 +++ 2 files changed, 92 insertions(+), 34 deletions(-) diff --git a/src/evaltune_main.cpp b/src/evaltune_main.cpp index 6fc1a4bc..90062138 100644 --- a/src/evaltune_main.cpp +++ b/src/evaltune_main.cpp @@ -8,6 +8,7 @@ #include "tuning/optim.hpp" #include "tuning/value.hpp" +#include "util/mem.hpp" #include "util/pretty.hpp" #include "util/types.hpp" @@ -17,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -41,6 +41,39 @@ int main() { std::cout << "Running on " << thread_count << " threads\n"; + // Pre-pass: Count total lines to reserve memory + size_t total_positions_estimate = 0; + auto count_lines = [](const std::string& filename) -> size_t { + std::ifstream f(filename, std::ios::binary); + if (!f) { + return 0; + } + constexpr size_t buffer_size = 128 * 1024; + std::vector buffer(buffer_size); + size_t lines = 0; + while (f.read(buffer.data(), buffer_size) || f.gcount() > 0) { + lines += + static_cast(std::count(buffer.data(), buffer.data() + f.gcount(), '\n')); + if (!f) { + break; + } + } + return lines; + }; + + std::cout << "Counting positions..." << std::endl; + for (const auto& filename : fenFiles) { + total_positions_estimate += count_lines(filename); + } + std::cout << "Estimated positions: " << total_positions_estimate << "\n"; + + positions.reserve(total_positions_estimate); + results.reserve(total_positions_estimate); + + // Huge pages optimization for dynamic arrays + enable_huge_pages(positions.data(), positions.capacity() * sizeof(Position)); + enable_huge_pages(results.data(), results.capacity() * sizeof(f64)); + for (const auto& filename : fenFiles) { std::ifstream fenFile(filename); if (!fenFile) { @@ -109,24 +142,40 @@ int main() { std::vector indices(positions.size()); // Initialize indices 1..N std::iota(indices.begin(), indices.end(), 0); + enable_huge_pages(indices.data(), indices.size() * sizeof(size_t)); + const size_t total_batches = (positions.size() + batch_size - 1) / batch_size; - // Shared gradient accumulator - Parameters batch_gradients = Parameters::zeros(parameter_count); + // Per-thread gradient buffers for lock-free accumulation + std::vector thread_grads(thread_count, Parameters::zeros(parameter_count)); + + // small enough to keep tape manageable, large enough for efficiency + const size_t micro_batch_size = 1024; - std::mutex mutex; + for (auto& tg : thread_grads) { + enable_huge_pages(tg.parameters.data(), tg.parameters.size() * sizeof(f64)); + enable_huge_pages(tg.pair_parameters.data(), tg.pair_parameters.size() * sizeof(f64x2)); + } std::barrier epoch_barrier{thread_count + 1}; std::barrier batch_barrier{thread_count + 1, [&]() noexcept { - // Single-thread optimizer update - optim.step(current_parameter_values, batch_gradients); - batch_gradients = Parameters::zeros(parameter_count); + // Reduce all thread gradients into thread_grads[0] + for (u32 i = 1; i < thread_count; ++i) { + thread_grads[0].accumulate(thread_grads[i]); + } + // Apply optimizer + optim.step(current_parameter_values, thread_grads[0]); }}; // Spawn worker threads for (u32 t = 0; t < thread_count; ++t) { std::thread([&, t]() { - // Each thread uses its own Graph arena + // Pre-allocated buffers (reused across micro-batches) + std::vector outputs; + std::vector targets; + outputs.reserve(micro_batch_size); + targets.reserve(micro_batch_size); + for (int epoch = 0; epoch < epochs; ++epoch) { epoch_barrier.arrive_and_wait(); @@ -136,45 +185,47 @@ int main() { size_t batch_end = std::min(batch_start + batch_size, positions.size()); size_t this_batch_size = batch_end - batch_start; - size_t sub_size = (this_batch_size + thread_count - 1) / thread_count; - + size_t sub_size = (this_batch_size + thread_count - 1) / thread_count; size_t sub_start = batch_start + sub_size * t; size_t sub_end = std::min(sub_start + sub_size, batch_end); + // Clear thread-local gradients for this batch + auto& my_grads = thread_grads[t]; + std::fill(my_grads.parameters.begin(), my_grads.parameters.end(), 0.0); + for (auto& p : my_grads.pair_parameters) { + p = f64x2::zero(); + } + Graph::get().copy_parameter_values(current_parameter_values); - std::vector outputs; - std::vector targets; - outputs.reserve(sub_end - sub_start); - targets.reserve(sub_end - sub_start); + // Process micro-batches to keep tape small + for (size_t mb_start = sub_start; mb_start < sub_end; + mb_start += micro_batch_size) { + size_t mb_end = std::min(mb_start + micro_batch_size, sub_end); - // Forward - for (size_t j = sub_start; j < sub_end; ++j) { - size_t idx = indices[j]; + outputs.clear(); + targets.clear(); - auto y = results[idx]; - ValueHandle v = (evaluate_white_pov(positions[idx]) * K).sigmoid(); - outputs.push_back(v); - targets.push_back(y); - } + // Forward pass for this micro-batch + for (size_t j = mb_start; j < mb_end; ++j) { + size_t idx = indices[j]; + outputs.push_back((evaluate_white_pov(positions[idx]) * K).sigmoid()); + targets.push_back(results[idx]); + } - // Backward - ValueHandle loss = mse(outputs, targets) - * ValueHandle::create(1.0 / double(this_batch_size)); + // Backward pass + ValueHandle loss = mse(outputs, targets) + * ValueHandle::create(1.0 / double(this_batch_size)); - Graph::get().backward(); + Graph::get().backward(); - Parameters grads = Graph::get().get_all_parameter_gradients(); + // Accumulate to thread-local buffer (no lock needed) + my_grads.accumulate(Graph::get().get_all_parameter_gradients()); - // Accumulate - { - std::lock_guard guard(mutex); - batch_gradients.accumulate(grads); + Graph::get().cleanup(); + Graph::get().zero_grad(); } - Graph::get().cleanup(); - Graph::get().zero_grad(); - batch_barrier.arrive_and_wait(); } } diff --git a/src/util/mem.hpp b/src/util/mem.hpp index 13508c2a..2ea05e26 100644 --- a/src/util/mem.hpp +++ b/src/util/mem.hpp @@ -158,6 +158,13 @@ template void make_unique_for_overwrite_huge_page(Args&&...) = delete; +template +void enable_huge_pages(T* ptr, std::size_t size_bytes) { +#ifdef __linux__ + madvise(static_cast(ptr), size_bytes, MADV_HUGEPAGE); +#endif +} + // Prefetching utilities inline void prefetch(const void* ptr) { __builtin_prefetch(ptr); From 6c8828f7c947703b838e0ce7f69e43c9b485492d Mon Sep 17 00:00:00 2001 From: Aethdv Date: Tue, 13 Jan 2026 16:55:55 +0100 Subject: [PATCH 2/4] nits & warning fixes --- src/evaltune_main.cpp | 20 ++++++++++---------- src/tuning/graph.cpp | 16 ++++++++-------- src/util/mem.hpp | 2 +- tests/test_static_vector.cpp | 18 ++++++++++++++++-- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/evaltune_main.cpp b/src/evaltune_main.cpp index 90062138..ebeca7b1 100644 --- a/src/evaltune_main.cpp +++ b/src/evaltune_main.cpp @@ -29,6 +29,10 @@ using namespace Clockwork::Autograd; int main() { + // Todo: make these CLI-specifiable + const size_t batch_size = 16 * 16384; + const size_t micro_batch_size = 1024; + std::vector positions; std::vector results; @@ -71,8 +75,8 @@ int main() { results.reserve(total_positions_estimate); // Huge pages optimization for dynamic arrays - enable_huge_pages(positions.data(), positions.capacity() * sizeof(Position)); - enable_huge_pages(results.data(), results.capacity() * sizeof(f64)); + advise_huge_pages(positions.data(), positions.capacity() * sizeof(Position)); + advise_huge_pages(results.data(), results.capacity() * sizeof(f64)); for (const auto& filename : fenFiles) { std::ifstream fenFile(filename); @@ -135,26 +139,22 @@ int main() { #else const i32 epochs = 1000; #endif - const f64 K = 1.0 / 400; - const size_t batch_size = 16 * 16384; + const f64 K = 1.0 / 400; std::mt19937 rng(std::random_device{}()); std::vector indices(positions.size()); // Initialize indices 1..N std::iota(indices.begin(), indices.end(), 0); - enable_huge_pages(indices.data(), indices.size() * sizeof(size_t)); + advise_huge_pages(indices.data(), indices.size() * sizeof(size_t)); const size_t total_batches = (positions.size() + batch_size - 1) / batch_size; // Per-thread gradient buffers for lock-free accumulation std::vector thread_grads(thread_count, Parameters::zeros(parameter_count)); - // small enough to keep tape manageable, large enough for efficiency - const size_t micro_batch_size = 1024; - for (auto& tg : thread_grads) { - enable_huge_pages(tg.parameters.data(), tg.parameters.size() * sizeof(f64)); - enable_huge_pages(tg.pair_parameters.data(), tg.pair_parameters.size() * sizeof(f64x2)); + advise_huge_pages(tg.parameters.data(), tg.parameters.size() * sizeof(f64)); + advise_huge_pages(tg.pair_parameters.data(), tg.pair_parameters.size() * sizeof(f64x2)); } std::barrier epoch_barrier{thread_count + 1}; diff --git a/src/tuning/graph.cpp b/src/tuning/graph.cpp index 91ef6163..dfa738e6 100644 --- a/src/tuning/graph.cpp +++ b/src/tuning/graph.cpp @@ -427,10 +427,10 @@ void Graph::cleanup() { void Graph::zero_grad() { for (usize i = 0; i < m_global_param_count; ++i) { - m_values.grad(i) = 0.0; + m_values.grad(static_cast(i)) = 0.0; } for (usize i = 0; i < m_global_pair_count; ++i) { - m_pairs.grad(i) = f64x2::zero(); + m_pairs.grad(static_cast(i)) = f64x2::zero(); } } @@ -441,10 +441,10 @@ void Graph::copy_parameter_values(const Parameters& source) { std::terminate(); } for (usize i = 0; i < m_global_param_count; ++i) { - m_values.val(i) = source.parameters[i]; + m_values.val(static_cast(i)) = source.parameters[i]; } for (usize i = 0; i < m_global_pair_count; ++i) { - m_pairs.val(i) = source.pair_parameters[i]; + m_pairs.val(static_cast(i)) = source.pair_parameters[i]; } } @@ -453,10 +453,10 @@ Parameters Graph::get_all_parameter_values() const { p.parameters.reserve(m_global_param_count); p.pair_parameters.reserve(m_global_pair_count); for (usize i = 0; i < m_global_param_count; ++i) { - p.parameters.push_back(m_values.val(i)); + p.parameters.push_back(m_values.val(static_cast(i))); } for (usize i = 0; i < m_global_pair_count; ++i) { - p.pair_parameters.push_back(m_pairs.val(i)); + p.pair_parameters.push_back(m_pairs.val(static_cast(i))); } return p; } @@ -466,10 +466,10 @@ Parameters Graph::get_all_parameter_gradients() const { p.parameters.reserve(m_global_param_count); p.pair_parameters.reserve(m_global_pair_count); for (usize i = 0; i < m_global_param_count; ++i) { - p.parameters.push_back(m_values.grad(i)); + p.parameters.push_back(m_values.grad(static_cast(i))); } for (usize i = 0; i < m_global_pair_count; ++i) { - p.pair_parameters.push_back(m_pairs.grad(i)); + p.pair_parameters.push_back(m_pairs.grad(static_cast(i))); } return p; } diff --git a/src/util/mem.hpp b/src/util/mem.hpp index 2ea05e26..a7f8feae 100644 --- a/src/util/mem.hpp +++ b/src/util/mem.hpp @@ -159,7 +159,7 @@ void make_unique_for_overwrite_huge_page(Args&&...) = delete; template -void enable_huge_pages(T* ptr, std::size_t size_bytes) { +void advise_huge_pages(T* ptr, std::size_t size_bytes) { #ifdef __linux__ madvise(static_cast(ptr), size_bytes, MADV_HUGEPAGE); #endif diff --git a/tests/test_static_vector.cpp b/tests/test_static_vector.cpp index 5b5850d4..2c0f630b 100644 --- a/tests/test_static_vector.cpp +++ b/tests/test_static_vector.cpp @@ -72,8 +72,22 @@ void special_member_funs() { auto copy = vec; REQUIRE(ctr == 6); REQUIRE(copy == vec); - copy = copy; // NOLINT - copy = std::move(copy); // NOLINT +// Intentionally testing self-assign/move behavior - suppress warnings +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wself-assign-overloaded" + #pragma clang diagnostic ignored "-Wself-move" +#elif defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wself-move" +#endif + copy = copy; + copy = std::move(copy); +#if defined(__clang__) + #pragma clang diagnostic pop +#elif defined(__GNUC__) + #pragma GCC diagnostic pop +#endif REQUIRE(ctr == 6); REQUIRE(copy.end() - copy.begin() == 3); auto copy2 = std::move(copy); From ec52fd1b5d3890f7640a6186b1ffb1bfe07b8f7a Mon Sep 17 00:00:00 2001 From: TheRealGioviok <425gioviok@gmail.com> Date: Tue, 13 Jan 2026 22:28:37 +0100 Subject: [PATCH 3/4] Huge speedup by tuning the microbatch size --- src/evaltune_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaltune_main.cpp b/src/evaltune_main.cpp index ebeca7b1..d4d3a660 100644 --- a/src/evaltune_main.cpp +++ b/src/evaltune_main.cpp @@ -31,7 +31,7 @@ int main() { // Todo: make these CLI-specifiable const size_t batch_size = 16 * 16384; - const size_t micro_batch_size = 1024; + const size_t micro_batch_size = 160; std::vector positions; std::vector results; From 7bc17ef56bb366ac17e9c488b08b12bd0e960a68 Mon Sep 17 00:00:00 2001 From: TheRealGioviok <425gioviok@gmail.com> Date: Wed, 14 Jan 2026 00:09:24 +0100 Subject: [PATCH 4/4] Tuned values Bench: 13922193 --- src/eval_constants.hpp | 138 ++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/src/eval_constants.hpp b/src/eval_constants.hpp index a5e72a23..f6177930 100644 --- a/src/eval_constants.hpp +++ b/src/eval_constants.hpp @@ -5,11 +5,11 @@ namespace Clockwork { // clang-format off -inline const PParam PAWN_MAT = S(170, 156); -inline const PParam KNIGHT_MAT = S(512, 314); -inline const PParam BISHOP_MAT = S(453, 269); -inline const PParam ROOK_MAT = S(544, 412); -inline const PParam QUEEN_MAT = S(1080, 561); +inline const PParam PAWN_MAT = S(171, 156); +inline const PParam KNIGHT_MAT = S(517, 316); +inline const PParam BISHOP_MAT = S(473, 272); +inline const PParam ROOK_MAT = S(546, 411); +inline const PParam QUEEN_MAT = S(1082, 561); inline const PParam TEMPO_VAL = S(63, 14); inline const PParam BISHOP_PAIR_VAL = S(77, 163); @@ -28,68 +28,68 @@ inline const PParam PAWN_PUSH_THREAT_ROOK = S(38, 34); inline const PParam PAWN_PUSH_THREAT_QUEEN = S(70, -53); inline const std::array PAWN_PHALANX = { - S(22, 12), S(57, 33), S(73, 70), S(181, 153), S(481, 224), S(672, 676), + S(22, 12), S(57, 33), S(73, 70), S(181, 153), S(481, 225), S(668, 679), }; inline const std::array DEFENDED_PAWN = { S(70, 38), S(60, 33), S(73, 65), S(175, 117), S(593, -10), }; inline const std::array PASSED_PAWN = { - S(-81, -96), S(-81, -74), S(-54, 7), S(27, 76), S(102, 196), S(302, 277), + S(-80, -96), S(-80, -74), S(-54, 7), S(28, 76), S(102, 197), S(302, 277), }; inline const std::array DEFENDED_PASSED_PUSH = { S(43, -38), S(38, -6), S(29, 20), S(26, 66), S(97, 129), S(211, 209), }; inline const std::array BLOCKED_PASSED_PAWN = { - S(10, -33), S(-1, 11), S(-4, -19), S(-7, -39), S(-14, -89), S(-268, -106), + S(10, -33), S(-1, 11), S(-4, -19), S(-7, -40), S(-14, -89), S(-268, -106), }; inline const std::array FRIENDLY_KING_PASSED_PAWN_DISTANCE = { - S(0, 0), S(-3, 114), S(-16, 90), S(-9, 33), S(-1, 6), S(11, 6), S(57, 2), S(19, -6), + S(0, 0), S(-3, 114), S(-16, 90), S(-9, 33), S(-1, 5), S(11, 6), S(57, 2), S(19, -6), }; inline const std::array ENEMY_KING_PASSED_PAWN_DISTANCE = { - S(0, 0), S(-299, -9), S(-34, 23), S(-13, 50), S(34, 61), S(40, 80), S(48, 87), S(19, 84), + S(0, 0), S(-300, -9), S(-35, 23), S(-13, 50), S(34, 60), S(39, 80), S(48, 87), S(19, 84), }; inline const std::array KNIGHT_MOBILITY = { - S(12, -59), S(111, 94), S(168, 165), S(205, 193), S(252, 203), S(281, 236), S(317, 228), S(355, 230), S(400, 169), + S(15, -60), S(114, 94), S(171, 164), S(209, 193), S(255, 203), S(284, 235), S(320, 228), S(358, 229), S(404, 169), }; inline const std::array BISHOP_MOBILITY = { - S(-17, -121), S(53, 38), S(116, 80), S(144, 128), S(173, 157), S(191, 177), S(200, 193), S(218, 198), S(230, 209), S(250, 198), S(273, 187), S(332, 144), S(337, 142), S(408, 97), + S(-18, -122), S(53, 37), S(115, 79), S(143, 127), S(172, 156), S(190, 176), S(199, 192), S(217, 197), S(229, 208), S(249, 197), S(272, 187), S(331, 143), S(336, 142), S(407, 96), }; inline const std::array ROOK_MOBILITY = { - S(278, 99), S(201, 260), S(230, 280), S(246, 283), S(260, 293), S(265, 305), S(273, 313), S(284, 312), S(291, 317), S(302, 319), S(315, 318), S(324, 317), S(331, 316), S(350, 296), S(458, 201), + S(275, 98), S(198, 259), S(226, 279), S(243, 282), S(256, 292), S(262, 304), S(270, 312), S(281, 311), S(288, 316), S(299, 318), S(311, 317), S(321, 316), S(327, 315), S(347, 295), S(455, 200), }; inline const std::array QUEEN_MOBILITY = { - S(460, 121), S(525, 234), S(573, 270), S(601, 365), S(613, 424), S(630, 455), S(633, 497), S(642, 501), S(645, 525), S(651, 535), S(654, 548), S(660, 552), S(671, 540), S(676, 542), S(680, 539), S(684, 534), S(689, 527), S(688, 527), S(700, 506), S(719, 488), S(735, 467), S(750, 434), S(779, 414), S(899, 302), S(950, 248), S(944, 242), S(862, 289), S(961, 217), + S(457, 121), S(522, 233), S(570, 270), S(598, 364), S(609, 423), S(626, 454), S(630, 496), S(639, 501), S(642, 525), S(648, 534), S(651, 548), S(656, 551), S(668, 539), S(673, 542), S(677, 538), S(680, 534), S(686, 526), S(685, 526), S(697, 505), S(716, 487), S(731, 466), S(747, 433), S(775, 413), S(895, 301), S(946, 248), S(940, 241), S(861, 287), S(958, 217), }; inline const std::array KING_MOBILITY = { - S(659, -333), S(244, -188), S(121, -85), S(81, -36), S(30, -21), S(-26, 4), S(-74, 40), S(-126, 67), S(-165, 55), + S(657, -331), S(243, -187), S(120, -84), S(81, -36), S(29, -21), S(-27, 4), S(-74, 40), S(-127, 68), S(-165, 56), }; inline const std::array KNIGHT_KING_RING = { - S(228, 227), S(347, 180), S(450, 118), + S(225, 227), S(344, 180), S(447, 118), }; inline const std::array BISHOP_KING_RING = { - S(475, 373), S(292, 235), S(166, 68), + S(457, 372), S(281, 235), S(163, 70), }; inline const std::array ROOK_KING_RING = { - S(317, 319), S(444, 311), S(485, 319), S(587, 370), S(744, 329), + S(319, 321), S(446, 313), S(488, 321), S(589, 372), S(746, 331), }; inline const std::array QUEEN_KING_RING = { - S(870, 745), S(585, 608), S(329, 459), S(138, 254), S(78, -11), S(12, -295), + S(874, 747), S(589, 609), S(332, 459), S(142, 254), S(81, -12), S(15, -297), }; inline const std::array PT_INNER_RING_ATTACKS = { - S(-119, 61), S(27, -18), S(-223, -129), S(56, 38), S(-241, -153), + S(-119, 61), S(27, -18), S(-216, -128), S(56, 38), S(-242, -154), }; inline const std::array PT_OUTER_RING_ATTACKS = { S(-28, 22), S(-21, 20), S(-20, 15), S(-15, 9), S(-21, -12), }; inline const PParam PAWN_THREAT_KNIGHT = S(245, 60); -inline const PParam PAWN_THREAT_BISHOP = S(218, 116); +inline const PParam PAWN_THREAT_BISHOP = S(217, 116); inline const PParam PAWN_THREAT_ROOK = S(203, 100); -inline const PParam PAWN_THREAT_QUEEN = S(187, -45); +inline const PParam PAWN_THREAT_QUEEN = S(186, -45); inline const PParam KNIGHT_THREAT_BISHOP = S(118, 73); inline const PParam KNIGHT_THREAT_ROOK = S(256, 15); @@ -104,64 +104,64 @@ inline const std::array BISHOP_PAWNS = { }; inline const std::array PAWN_PSQT = { - S(295, 277), S(197, 347), S(309, 312), S(292, 250), S(344, 179), S(249, 255), S(204, 279), S(310, 229), // - S(159, 173), S(210, 218), S(200, 165), S(164, 130), S(147, 100), S(117, 140), S(96, 179), S(61, 185), // - S(101, 138), S(100, 157), S(113, 116), S(99, 108), S(84, 96), S(54, 110), S(25, 143), S(10, 161), // - S(88, 93), S(101, 123), S(106, 96), S(88, 99), S(59, 93), S(41, 100), S(-8, 142), S(-22, 128), // - S(86, 68), S(146, 73), S(92, 119), S(76, 119), S(44, 113), S(13, 112), S(-11, 127), S(-27, 113), // - S(85, 75), S(210, 84), S(151, 129), S(105, 146), S(74, 131), S(50, 117), S(30, 136), S(-8, 129), // + S(294, 277), S(196, 348), S(309, 313), S(292, 250), S(343, 180), S(249, 255), S(203, 279), S(309, 230), // + S(158, 173), S(209, 218), S(199, 165), S(163, 130), S(146, 100), S(116, 140), S(95, 179), S(60, 185), // + S(100, 138), S(99, 157), S(112, 116), S(98, 108), S(83, 96), S(53, 110), S(24, 143), S(9, 161), // + S(87, 93), S(100, 123), S(105, 96), S(87, 99), S(58, 93), S(40, 100), S(-9, 142), S(-23, 128), // + S(85, 68), S(145, 73), S(91, 119), S(75, 119), S(43, 113), S(12, 112), S(-12, 127), S(-28, 113), // + S(83, 75), S(209, 84), S(150, 129), S(104, 146), S(73, 131), S(49, 117), S(29, 136), S(-9, 129), // }; inline const std::array KNIGHT_PSQT = { - S(-283, -47), S(-219, 186), S(-456, 438), S(94, 149), S(22, 155), S(-109, 133), S(-371, 196), S(-338, 31), // - S(112, 122), S(163, 122), S(244, 82), S(219, 118), S(216, 130), S(149, 123), S(132, 131), S(105, 105), // - S(181, 92), S(194, 139), S(251, 152), S(212, 171), S(186, 176), S(129, 186), S(140, 125), S(110, 120), // - S(240, 133), S(224, 151), S(227, 177), S(202, 210), S(219, 199), S(192, 184), S(194, 139), S(191, 114), // - S(232, 118), S(267, 108), S(244, 152), S(229, 175), S(216, 173), S(218, 162), S(219, 111), S(181, 112), // - S(178, 92), S(207, 89), S(190, 126), S(211, 150), S(212, 147), S(162, 134), S(162, 99), S(128, 82), // - S(188, 86), S(203, 72), S(185, 87), S(183, 108), S(170, 114), S(149, 69), S(155, 82), S(115, 17), // - S(132, 64), S(166, 122), S(191, 75), S(208, 63), S(193, 82), S(143, 63), S(133, 99), S(83, 36), // + S(-288, -49), S(-224, 184), S(-461, 436), S(89, 146), S(17, 153), S(-113, 131), S(-375, 194), S(-343, 30), // + S(107, 119), S(159, 120), S(239, 81), S(214, 116), S(211, 128), S(145, 121), S(127, 129), S(101, 103), // + S(176, 90), S(189, 137), S(246, 151), S(207, 169), S(181, 174), S(124, 184), S(135, 123), S(105, 118), // + S(235, 131), S(219, 149), S(222, 175), S(197, 208), S(215, 196), S(188, 181), S(189, 137), S(186, 112), // + S(228, 116), S(263, 106), S(239, 150), S(225, 173), S(211, 171), S(213, 160), S(214, 109), S(176, 110), // + S(173, 90), S(203, 87), S(185, 124), S(206, 148), S(208, 144), S(157, 132), S(157, 96), S(123, 79), // + S(184, 84), S(199, 70), S(180, 85), S(178, 106), S(165, 112), S(145, 67), S(150, 79), S(110, 15), // + S(127, 62), S(161, 120), S(186, 73), S(203, 61), S(188, 80), S(138, 61), S(128, 96), S(78, 34), // }; inline const std::array BISHOP_PSQT = { - S(-33, 185), S(-68, 169), S(-358, 242), S(-222, 209), S(-189, 219), S(-290, 253), S(-104, 220), S(-29, 195), // - S(77, 115), S(25, 175), S(59, 133), S(-5, 181), S(-20, 187), S(39, 162), S(68, 144), S(51, 135), // - S(145, 127), S(171, 125), S(167, 157), S(129, 150), S(100, 147), S(98, 156), S(144, 133), S(118, 113), // - S(124, 108), S(143, 126), S(163, 139), S(143, 177), S(172, 163), S(112, 148), S(119, 122), S(90, 112), // - S(140, 82), S(150, 110), S(160, 129), S(167, 151), S(156, 159), S(121, 149), S(85, 130), S(91, 97), // - S(158, 87), S(193, 93), S(211, 108), S(160, 145), S(151, 131), S(157, 127), S(166, 103), S(105, 109), // - S(142, 39), S(226, 57), S(189, 70), S(156, 97), S(138, 107), S(146, 80), S(147, 86), S(136, 46), // - S(148, 49), S(134, 98), S(139, 121), S(158, 77), S(166, 65), S(158, 124), S(152, 80), S(131, 72), // + S(-34, 184), S(-69, 168), S(-360, 241), S(-223, 208), S(-190, 217), S(-291, 252), S(-105, 219), S(-30, 194), // + S(76, 114), S(24, 174), S(58, 132), S(-6, 180), S(-21, 186), S(38, 161), S(67, 144), S(50, 134), // + S(143, 126), S(170, 124), S(166, 156), S(128, 149), S(99, 146), S(97, 155), S(143, 132), S(117, 113), // + S(122, 107), S(142, 125), S(162, 138), S(142, 176), S(171, 163), S(111, 147), S(118, 121), S(89, 112), // + S(139, 81), S(149, 109), S(159, 128), S(166, 150), S(155, 158), S(120, 148), S(84, 129), S(90, 96), // + S(157, 86), S(192, 92), S(210, 107), S(158, 144), S(150, 130), S(156, 126), S(165, 102), S(104, 108), // + S(141, 38), S(225, 56), S(188, 69), S(155, 96), S(137, 106), S(145, 79), S(146, 85), S(135, 45), // + S(147, 48), S(133, 97), S(138, 120), S(157, 76), S(165, 64), S(156, 123), S(151, 79), S(130, 71), // }; inline const std::array ROOK_PSQT = { - S(397, 245), S(433, 254), S(394, 277), S(404, 260), S(400, 259), S(348, 270), S(363, 272), S(360, 275), // - S(291, 301), S(354, 291), S(445, 265), S(380, 288), S(382, 289), S(343, 294), S(261, 323), S(260, 323), // - S(278, 287), S(405, 254), S(437, 246), S(420, 244), S(381, 254), S(333, 278), S(333, 279), S(254, 312), // - S(256, 274), S(332, 274), S(372, 258), S(352, 255), S(347, 264), S(305, 283), S(285, 284), S(226, 297), // - S(214, 229), S(285, 239), S(265, 257), S(262, 245), S(253, 254), S(238, 281), S(208, 272), S(183, 269), // - S(182, 203), S(246, 200), S(249, 223), S(236, 215), S(240, 209), S(212, 246), S(203, 224), S(162, 238), // - S(94, 219), S(211, 161), S(232, 183), S(241, 186), S(233, 187), S(219, 201), S(197, 184), S(170, 200), // - S(144, 207), S(161, 228), S(234, 188), S(261, 172), S(241, 190), S(229, 205), S(214, 197), S(194, 221), // + S(399, 246), S(435, 254), S(396, 278), S(406, 261), S(402, 260), S(351, 271), S(365, 273), S(362, 276), // + S(293, 302), S(356, 292), S(447, 266), S(382, 289), S(385, 290), S(345, 295), S(264, 324), S(262, 324), // + S(280, 288), S(407, 255), S(439, 247), S(423, 245), S(383, 254), S(335, 279), S(336, 280), S(256, 313), // + S(258, 275), S(334, 275), S(374, 259), S(354, 256), S(349, 264), S(307, 284), S(288, 285), S(228, 298), // + S(216, 230), S(287, 240), S(267, 258), S(265, 245), S(255, 255), S(241, 282), S(210, 273), S(185, 270), // + S(184, 204), S(248, 201), S(252, 224), S(238, 216), S(243, 210), S(214, 247), S(205, 225), S(164, 239), // + S(96, 220), S(213, 163), S(235, 184), S(243, 187), S(235, 188), S(222, 202), S(200, 185), S(172, 201), // + S(146, 208), S(163, 229), S(236, 189), S(263, 173), S(243, 191), S(231, 206), S(216, 198), S(197, 222), // }; inline const std::array QUEEN_PSQT = { - S(461, 412), S(539, 348), S(528, 381), S(446, 485), S(461, 447), S(449, 442), S(486, 373), S(388, 448), // - S(483, 441), S(426, 546), S(430, 558), S(287, 621), S(293, 613), S(363, 568), S(392, 484), S(412, 462), // - S(421, 498), S(512, 485), S(439, 558), S(406, 577), S(359, 580), S(376, 533), S(450, 435), S(427, 407), // - S(497, 395), S(492, 465), S(452, 508), S(428, 560), S(417, 563), S(419, 497), S(474, 402), S(471, 370), // - S(480, 399), S(489, 418), S(465, 468), S(437, 512), S(439, 508), S(436, 469), S(456, 398), S(467, 350), // - S(467, 318), S(501, 351), S(503, 420), S(467, 401), S(463, 394), S(471, 416), S(476, 358), S(462, 332), // - S(449, 200), S(492, 153), S(495, 243), S(506, 291), S(488, 318), S(488, 287), S(459, 337), S(459, 323), // - S(407, 241), S(457, 42), S(457, 68), S(486, 159), S(493, 234), S(494, 186), S(484, 218), S(436, 267), // + S(462, 412), S(540, 347), S(530, 380), S(448, 484), S(462, 447), S(451, 442), S(488, 372), S(389, 447), // + S(484, 441), S(427, 545), S(432, 557), S(289, 620), S(295, 613), S(364, 567), S(393, 484), S(414, 461), // + S(422, 497), S(514, 485), S(440, 557), S(407, 576), S(360, 579), S(377, 532), S(452, 435), S(428, 406), // + S(498, 395), S(494, 464), S(454, 508), S(430, 559), S(419, 562), S(421, 496), S(475, 402), S(473, 369), // + S(481, 398), S(491, 417), S(466, 467), S(438, 511), S(441, 507), S(438, 469), S(458, 398), S(469, 349), // + S(469, 318), S(503, 350), S(504, 420), S(469, 401), S(465, 393), S(472, 415), S(477, 358), S(463, 332), // + S(450, 199), S(493, 152), S(496, 242), S(508, 290), S(490, 318), S(490, 286), S(460, 336), S(461, 322), // + S(408, 240), S(458, 41), S(459, 67), S(487, 158), S(495, 233), S(496, 185), S(485, 217), S(437, 266), // }; inline const std::array KING_PSQT = { - S(-606, -153), S(-133, 153), S(57, 71), S(-135, 39), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(21, -42), S(173, 58), S(172, 68), S(139, -12), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-43, 86), S(209, 61), S(227, 53), S(122, 18), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-294, 101), S(131, 35), S(63, 45), S(56, 33), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-299, 68), S(73, 1), S(93, 4), S(9, 38), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-154, 35), S(159, -34), S(118, -7), S(89, 11), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-5, -26), S(179, -57), S(133, -29), S(61, 7), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // - S(-190, -17), S(-18, -21), S(-108, 10), S(-111, -4), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-603, -152), S(-132, 154), S(58, 72), S(-134, 40), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(23, -42), S(174, 59), S(173, 68), S(141, -12), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-42, 87), S(210, 62), S(228, 54), S(123, 19), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-293, 102), S(132, 36), S(63, 46), S(56, 34), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-298, 68), S(74, 2), S(94, 5), S(10, 39), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-153, 36), S(160, -34), S(119, -7), S(90, 12), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-4, -25), S(180, -56), S(134, -28), S(62, 7), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // + S(-189, -17), S(-18, -21), S(-107, 11), S(-110, -4), S(0, 0), S(0, 0), S(0, 0), S(0, 0), // }; -// Epoch duration: 8.68854s +// Epoch duration: 4.78127s // clang-format on } // namespace Clockwork