From c857e1251e68847d14a72c802d7575cbd1e16ccc Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:17 +0100
Subject: [PATCH 01/48] Fix OpenMP typo

---
 include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
index ee4f6f15e..61729d9b8 100644
--- a/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp
@@ -6,7 +6,7 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  *
- * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the CUDA backend.
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the OpenMP backend.
  */
 
 #ifndef PLSSVM_BACKENDS_OPENMP_KERNEL_CG_EXPLICIT_BLAS_HPP_

From 124a779e71a375dc7a2dccba97deaf5a2550ae1f Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:17 +0100
Subject: [PATCH 02/48] Initial commit HPX backend

---
 include/plssvm/backends/HPX/csvm.hpp          | 177 ++++++++++++
 .../plssvm/backends/HPX/detail/utility.hpp    |  33 +++
 include/plssvm/backends/HPX/exceptions.hpp    |  39 +++
 .../backends/HPX/kernel/cg_explicit/blas.hpp  | 109 ++++++++
 .../cg_explicit/kernel_matrix_assembly.hpp    | 118 ++++++++
 .../kernel_matrix_assembly_blas.hpp           | 140 ++++++++++
 .../backends/HPX/kernel/kernel_functions.hpp  | 159 +++++++++++
 .../backends/HPX/kernel/predict_kernel.hpp    | 251 ++++++++++++++++++
 8 files changed, 1026 insertions(+)
 create mode 100644 include/plssvm/backends/HPX/csvm.hpp
 create mode 100644 include/plssvm/backends/HPX/detail/utility.hpp
 create mode 100644 include/plssvm/backends/HPX/exceptions.hpp
 create mode 100644 include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
 create mode 100644 include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
 create mode 100644 include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
 create mode 100644 include/plssvm/backends/HPX/kernel/kernel_functions.hpp
 create mode 100644 include/plssvm/backends/HPX/kernel/predict_kernel.hpp

diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
new file mode 100644
index 000000000..a2867ac1e
--- /dev/null
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -0,0 +1,177 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#define PLSSVM_BACKENDS_HPX_CSVM_HPP_
+#pragma once
+
+#include "plssvm/backends/hpx/implementation_types.hpp"  // plssvm::hpx::implementation_type
+#include "plssvm/constants.hpp"                             // plssvm::real_type
+#include "plssvm/csvm.hpp"                                  // plssvm::csvm, plssvm::detail::csvm_backend_exists
+#include "plssvm/detail/memory_size.hpp"                    // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"                  // plssvm::detail::move_only_any
+#include "plssvm/detail/type_traits.hpp"                    // PLSSVM_REQUIRES
+#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
+#include "plssvm/parameter.hpp"                             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
+#include "plssvm/solver_types.hpp"                          // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
+
+#include <cstddef>      // std::size_t
+#include <type_traits>  // std::true_type
+#include <utility>      // std::forward, std::pair
+#include <vector>       // std::vector
+
+namespace plssvm {
+
+namespace hpx {
+
+/**
+ * @brief A C-SVM implementation using hpx as backend.
+ */
+class csvm : public ::plssvm::csvm {
+  public:
+    /**
+     * @brief Construct a new C-SVM using the HPX backend with the parameters given through @p params.
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(parameter params = {});
+    /**
+     * @brief Construct a new C-SVM using the hpx backend on the @p target platform with the parameters given through @p params.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] params struct encapsulating all possible SVM parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    explicit csvm(target_platform target, parameter params = {});
+
+    /**
+     * @brief Construct a new C-SVM using the HPX backend and the optionally provided @p named_args.
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvm(Args &&...named_args) :
+        ::plssvm::csvm{ std::forward<Args>(named_args)... } {
+        // the default target is the automatic one
+        this->init(plssvm::target_platform::automatic);
+    }
+
+    /**
+     * @brief Construct a new C-SVM using the HPX backend on the @p target platform and the optionally provided @p named_args.
+     * @param[in] target the target platform used for this C-SVM
+     * @param[in] named_args the additional optional named-parameters
+     * @throws plssvm::exception all exceptions thrown in the base class constructor
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    template <typename... Args, PLSSVM_REQUIRES(::plssvm::detail::has_only_parameter_named_args_v<Args...>)>
+    explicit csvm(const target_platform target, Args &&...named_args) :
+        ::plssvm::csvm{ std::forward<Args>(named_args)... } {
+        this->init(target);
+    }
+
+    /**
+     * @copydoc plssvm::csvm::csvm(const plssvm::csvm &)
+     */
+    csvm(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::csvm(plssvm::csvm &&) noexcept
+     */
+    csvm(csvm &&) noexcept = default;
+    /**
+     * @copydoc plssvm::csvm::operator=(const plssvm::csvm &)
+     */
+    csvm &operator=(const csvm &) = delete;
+    /**
+     * @copydoc plssvm::csvm::operator=(plssvm::csvm &&) noexcept
+     */
+    csvm &operator=(csvm &&) noexcept = default;
+    /**
+     * @brief Default destructor since the copy and move constructors and copy- and move-assignment operators are defined.
+     */
+    ~csvm() override = default;
+
+    /**
+     * @copydoc plssvm::csvm::num_available_devices
+     * @note We currently only support one device for C++ standard parallelism.
+     */
+    [[nodiscard]] std::size_t num_available_devices() const noexcept override {
+        return 1;
+    }
+
+    /**
+     * @brief Return the hpx implementation type.
+     * @return the hpx implementation type (`[[nodiscard]]`)
+     */
+    [[nodiscard]] implementation_type get_implementation_type() const noexcept;
+
+    protected:
+    /**
+     * @copydoc plssvm::csvm::get_device_memory
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_device_memory() const final;
+    /**
+     * @copydoc plssvm::csvm::get_max_mem_alloc_size
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_max_mem_alloc_size() const final;
+
+    //***************************************************//
+    //                        fit                        //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::csvm::assemble_kernel_matrix
+     */
+    [[nodiscard]] std::vector<::plssvm::detail::move_only_any> assemble_kernel_matrix(solver_type solver, const parameter &params, const soa_matrix<real_type> &A, const std::vector<real_type> &q_red, real_type QA_cost) const final;
+    /**
+     * @copydoc plssvm::csvm::blas_level_3
+     */
+    void blas_level_3(solver_type solver, real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix<real_type> &B, real_type beta, soa_matrix<real_type> &C) const final;
+
+    //***************************************************//
+    //                   predict, score                  //
+    //***************************************************//
+    /**
+     * @copydoc plssvm::csvm::predict_values
+     */
+    [[nodiscard]] aos_matrix<real_type> predict_values(const parameter &params, const soa_matrix<real_type> &support_vectors, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, soa_matrix<real_type> &w, const soa_matrix<real_type> &predict_points) const final;
+
+  private:
+    /**
+     * @brief Initializes the hpx backend and performs some sanity checks.
+     * @param[in] target the target platform to use
+     * @throws plssvm::hpx::backend_exception if the requested target is not available
+     * @throws plssvm::hpx::backend_exception if no device for the requested target was found
+     */
+    void init(target_platform target);
+};
+
+}  // namespace hpx
+
+namespace detail {
+
+/**
+ * @brief Sets the `value` to `true` since C-SVMs using the HPX backend are available.
+ */
+template <>
+struct csvm_backend_exists<hpx::csvm> : std::true_type { };
+
+}  // namespace detail
+
+}  // namespace plssvm
+
+#endif  // PLSSVM_BACKENDS_HPX_CSVM_HPP_
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
new file mode 100644
index 000000000..1dee82838
--- /dev/null
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -0,0 +1,33 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Utility functions specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#define PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
+#pragma once
+
+    #include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
+
+#include <string>  // std::string
+
+namespace plssvm::hpx::detail {
+
+using boost::atomic_ref;
+
+/**
+ * @brief Return the version of the HPX backend.
+ * @return the HPX version (`[[nodiscard]]`)
+ */
+// [[nodiscard]] std::string get_hpx_version();
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/backends/HPX/exceptions.hpp b/include/plssvm/backends/HPX/exceptions.hpp
new file mode 100644
index 000000000..fc7925f24
--- /dev/null
+++ b/include/plssvm/backends/HPX/exceptions.hpp
@@ -0,0 +1,39 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements custom exception classes specific to the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
+#pragma once
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::hpx {
+
+/**
+ * @brief Exception type thrown if a problem with the HPX backend occurs.
+ */
+class backend_exception : public exception {
+  public:
+    /**
+     * @brief Construct a new exception forwarding the exception message and source location to plssvm::exception.
+     * @param[in] msg the exception's `what()` message
+     * @param[in] loc the exception's call side information
+     */
+    explicit backend_exception(const std::string &msg, source_location loc = source_location::current());
+};
+
+}  // namespace plssvm::hpx
+
+#endif  // PLSSVM_BACKENDS_HPX_EXCEPTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
new file mode 100644
index 000000000..9f895a67f
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -0,0 +1,109 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly performing a BLAS GEMM like matrix-matrix multiplication using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"      // plssvm::{real_type, INTERNAL_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"  // PLSSVM_ASSERT
+#include "plssvm/matrix.hpp"         // plssvm::soa_matrix
+#include "plssvm/shape.hpp"          // plssvm::shape
+
+#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
+#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
+#include <array>      // std::array
+#include <cmath>      // std::ceil
+#include <cstddef>    // std::size_t
+#include <utility>    // std::pair, std::make_pair
+#include <vector>     // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a symmetric matrix (memory optimized), @p B and @p C are matrices, and @p alpha and @p beta are scalars.
+ * @param[in] num_rows the number of rows in @p A and @p C
+ * @param[in] num_rhs the number of columns in @p B and @p C
+ * @param[in] alpha the scalar alpha value
+ * @param[in] A the matrix @p A
+ * @param[in] B the matrix @p B
+ * @param[in] beta the scalar beta value
+ * @param[in,out] C the matrix @p C, also used as result matrix
+ */
+inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const real_type alpha, const std::vector<real_type> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) {
+    PLSSVM_ASSERT(A.size() == (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2, "A matrix sizes mismatch!: {} != {}", A.size(), (num_rows + PADDING_SIZE) * (num_rows + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(B.shape() == (plssvm::shape{ num_rhs, num_rows }), "B matrix sizes mismatch!: {} != [{}, {}]", B.shape(), num_rhs, num_rows);
+    PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
+
+    // calculate constants
+    const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
+    const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_rhs * blocked_num_rows);
+    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i)
+    {
+        range[i] = std::make_pair(i / blocked_num_rows, i % blocked_num_rows);
+    });
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [rhs, row] = idx;
+        const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_rows; ++dim) {
+            // perform the dot product calculation
+            for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+                for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                    const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
+                    const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+
+                    real_type A_val = 0.0;
+                    // determine on which side of the diagonal we are located
+                    if (dim < global_j) {
+                        A_val = A_ptr[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    } else {
+                        A_val = A_ptr[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                    }
+                    temp[internal_i][internal_j] += A_val * B_ptr[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+                }
+            }
+        }
+
+        // apply the (partial) BLAS operation and update C
+        for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) {
+            for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) {
+                const std::size_t global_i = rhs_idx + static_cast<std::size_t>(internal_i);
+                const std::size_t global_j = row_idx + static_cast<std::size_t>(internal_j);
+
+                // be sure to not perform out of bounds accesses
+                if (global_i < num_rhs && global_j < num_rows) {
+                    C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
new file mode 100644
index 000000000..5db263ce4
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -0,0 +1,118 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for explicitly assembling the kernel matrix using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
+#pragma once
+
+#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix
+
+#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
+#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
+#include <array>      // std::array
+#include <cmath>      // std::ceil
+#include <cstddef>    // std::size_t
+#include <utility>    // std::pair, std::make_pair
+#include <vector>     // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Assemble the kernel matrix using the @p kernel function.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] q the `q` vector
+ * @param[out] kernel_matrix the resulting kernel matrix
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template <kernel_function_type kernel, typename... Args>
+void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_type> &kernel_matrix, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(kernel_matrix.size() == (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2, "Sizes mismatch (SYMM)!: {} != {}", kernel_matrix.size(), (q.size() + PADDING_SIZE) * (q.size() + PADDING_SIZE + 1) / 2);
+    PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+
+    const std::size_t dept = q.size();
+    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = data.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
+    hpx::experimental::for_loop(hpx::execution::par, 0, blocked_dept * blocked_dept, [&](auto i)
+    {
+        const std::size_t row = i / blocked_dept;
+        const std::size_t col = i % blocked_dept;
+        // only create valid row <-> col index pairs
+        if (row >= col) {
+            range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
+        }
+    })
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [row, col] = idx;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                }
+            }
+        }
+
+        // apply the remaining part of the kernel function and store the value in the output kernel matrix
+        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                // calculate the indices to access the kernel matrix (the part stored on the current device)
+                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                    real_type temp_ij = temp[internal_row][internal_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                    // apply the cost on the diagonal
+                    if (global_row == global_col) {
+                        temp_ij += cost;
+                    }
+                    kernel_matrix_ptr[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
new file mode 100644
index 000000000..56a3ef4b9
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -0,0 +1,140 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Functions for performing a matrix-matrix multiplication using an implicit kernel matrix.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
+#pragma once
+
+#include "plssvm/backends/hpx/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                // plssvm::real_type
+#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
+#include "plssvm/detail/operators.hpp"                         // overloaded arithmetic operations for a plssvm::matrix
+#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
+#include "plssvm/kernel_functions.hpp"                         // plssvm::kernel_function
+#include "plssvm/matrix.hpp"                                   // aos_matrix
+
+#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
+#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
+#include <array>      // std::array
+#include <cmath>      // std::ceil
+#include <cstddef>    // std::size_t
+#include <utility>    // std::pair, std::make_pair
+#include <vector>     // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar.
+ * @tparam kernel the compile-time kernel function to use
+ * @tparam Args the types of the potential additional arguments for the @p kernel function
+ * @param[in] alpha the scalar alpha value
+ * @param[in] q the `q` vector
+ * @param[in] data the data matrix
+ * @param[in] QA_cost he bottom right matrix entry multiplied by cost
+ * @param[in] cost 1 / the cost parameter in the C-SVM
+ * @param[in] B the matrix @p B
+ * @param[in] beta the beta alpha value
+ * @param[in,out] C the matrix @p C
+ * @param[in] kernel_function_parameter the potential additional arguments for the @p kernel function
+ */
+template <kernel_function_type kernel, typename... Args>
+inline void device_kernel_assembly_symm(const real_type alpha, const std::vector<real_type> &q, const soa_matrix<real_type> &data, const real_type QA_cost, const real_type cost, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(q.size() == data.num_rows() - 1, "Sizes mismatch!: {} != {}", q.size(), data.num_rows() - 1);
+    PLSSVM_ASSERT(cost != real_type{ 0.0 }, "cost must not be 0.0 since it is 1 / plssvm::cost!");
+    PLSSVM_ASSERT(B.shape() == C.shape(), "The matrices B and C must have the same shape!");
+    PLSSVM_ASSERT(B.num_cols() == q.size(), "The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), q.size());
+
+    using namespace operators;
+
+    // alpha * A * B + beta * C
+    C *= beta;
+
+    // calculate constants
+    const std::size_t dept = q.size();
+    const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = data.num_cols();
+    const std::size_t num_classes = B.num_rows();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
+    hpx::experimental::for_loop(hpx::execution::par, 0, blocked_dept * blocked_dept, [&](auto i)
+    {
+        const std::size_t row = i / blocked_dept;
+        const std::size_t col = i % blocked_dept;
+        // only create valid row <-> col index pairs
+        if (row >= col) {
+            range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
+        }
+    }
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [row, col] = idx;
+        const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
+
+        // only calculate the upper triangular matrix -> done be only iterating over valid row <-> col pairs
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+                for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                    const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                    const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                }
+            }
+        }
+
+        // apply the remaining part of the kernel function and store the value in the output kernel matrix
+        for (unsigned internal_row = 0; internal_row < INTERNAL_BLOCK_SIZE; ++internal_row) {
+            for (unsigned internal_col = 0; internal_col < INTERNAL_BLOCK_SIZE; ++internal_col) {
+                const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
+                const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
+
+                // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
+                if (global_row < dept && global_col < dept && global_row >= global_col) {
+                    real_type temp_ij = temp[internal_row][internal_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                    // apply the cost on the diagonal
+                    if (global_row == global_col) {
+                        temp_ij += cost;
+                        // calculate the values of alpha * A * B
+                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                        }
+                    } else {
+                        // calculate the values of alpha * A * B
+                        for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
+                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            // symmetry
+                            atomic_ref<real_type>{ C_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                        }
+                    }
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/kernel_functions.hpp b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
new file mode 100644
index 000000000..b7be1cb16
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/kernel_functions.hpp
@@ -0,0 +1,159 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implement the different kernel functions for the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
+#pragma once
+
+#include "plssvm/constants.hpp"              // plssvm::real_type
+#include "plssvm/kernel_function_types.hpp"  // plssvm::kernel_function_type
+
+#define PLSSVM_HPX_KERNEL_FUNCTION
+
+#include <cmath>   // std::abs, std::pow, std::exp, std::tanh
+#include <limits>  // std::numeric_limits::min
+
+namespace plssvm::hpx::detail {
+
+//***************************************************//
+//                 feature reductions                //
+//***************************************************//
+
+/**
+ * @brief Compute the default feature reduction, i.e., a simple dot-product.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <kernel_function_type kernel_function>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) {
+    return val1 * val2;
+}
+
+/**
+ * @brief Compute the feature reduction for the radial basis function kernel function, i.e., the squared Euclidean distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::rbf>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return d * d;
+}
+
+/**
+ * @brief Compute the feature reduction for the laplacian kernel function, i.e., the Manhattan distance.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::laplacian>(const real_type val1, const real_type val2) {
+    return std::abs(val1 - val2);
+}
+
+/**
+ * @brief Compute the feature reduction for the chi-squared kernel function.
+ * @note Be sure that the denominator isn't 0.0 which may be the case for padding values.
+ * @param[in] val1 the first feature value
+ * @param[in] val2 the second feature value
+ * @return the reduced value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type feature_reduce<kernel_function_type::chi_squared>(const real_type val1, const real_type val2) {
+    const real_type d = val1 - val2;
+    return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits<real_type>::min())) * d * d;
+}
+
+//***************************************************//
+//                  kernel functions                 //
+//***************************************************//
+
+/**
+ * @brief Unimplemented base-template for all kernel functions.
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <kernel_function_type, typename... Args>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function(real_type, Args...);
+
+/**
+ * @brief Compute the linear kernel function using @p value.
+ * @param[in] value the value to apply the linear kernel function to
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::linear>(const real_type value) {
+    return value;
+}
+
+/**
+ * @brief Compute the polynomial kernel function using @p value.
+ * @param[in] value the value to apply the polynomial kernel function to
+ * @param[in] degree the degree parameter of the polynomial kernel function
+ * @param[in] gamma the gamma parameter of the polynomial kernel function
+ * @param[in] coef0 the coef0 parameter of the polynomial kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::polynomial>(const real_type value, const int degree, const real_type gamma, const real_type coef0) {
+    return std::pow(gamma * value + coef0, (real_type) degree);
+}
+
+/**
+ * @brief Compute the radial basis function kernel function using @p value.
+ * @param[in] value the value to apply the rbf kernel function to
+ * @param[in] gamma the gamma parameter of the rbf kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::rbf>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the sigmoid kernel function using @p value.
+ * @param[in] value the value to apply the sigmoid kernel function to
+ * @param[in] gamma the gamma parameter of the kernel kernel function
+ * @param[in] coef0 the coef0 parameter of the kernel kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::sigmoid>(const real_type value, const real_type gamma, const real_type coef0) {
+    return std::tanh(gamma * value + coef0);
+}
+
+/**
+ * @brief Compute the laplacian function kernel function using @p value.
+ * @param[in] value the value to apply the laplacian kernel function to
+ * @param[in] gamma the gamma parameter of the laplacian kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::laplacian>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+/**
+ * @brief Compute the chi-squared function kernel function using @p value.
+ * @param[in] value the value to apply the chi-squared kernel function to
+ * @param[in] gamma the gamma parameter of the chi-squared kernel function
+ * @return the result value (`[[nodiscard]]`)
+ */
+template <>
+[[nodiscard]] inline PLSSVM_HPX_KERNEL_FUNCTION real_type apply_kernel_function<kernel_function_type::chi_squared>(const real_type value, const real_type gamma) {
+    return std::exp(-gamma * value);
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_KERNEL_FUNCTIONS_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
new file mode 100644
index 000000000..5182dd11b
--- /dev/null
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -0,0 +1,251 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines the functions used for prediction for the C-SVM using the HPX backend.
+ */
+
+#ifndef PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#define PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
+#pragma once
+
+#include "plssvm/backends/hpx/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/shape.hpp"                                    // plssvm::shape
+
+#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
+#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
+#include <array>      // std::array
+#include <cmath>      // std::fma
+#include <cstddef>    // std::size_t
+#include <utility>    // std::pair, std::make_pair
+#include <vector>     // std::vector
+
+namespace plssvm::hpx::detail {
+
+/**
+ * @brief Calculate the `w` vector used to speedup the prediction using the linear kernel function.
+ * @param[out] w the vector to speedup the linear prediction
+ * @param[in] alpha the previously learned weights
+ * @param[in] support_vectors the support vectors
+ */
+inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<real_type> &alpha, const soa_matrix<real_type> &support_vectors) {
+    PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+    PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
+
+    // calculate constants
+    const std::size_t num_features = support_vectors.num_cols();
+    const auto blocked_num_features = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_features) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_classes = alpha.num_rows();
+    const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_support_vectors = support_vectors.num_rows();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_features * blocked_num_classes);
+    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+        range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
+    });
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [feature, c] = idx;
+        const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t sv = 0; sv < num_support_vectors; ++sv) {
+            // perform the feature reduction calculation
+            for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+                for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                    const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
+                    const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                    temp[internal_feature][internal_class] += alpha_ptr[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * sv_ptr[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+                }
+            }
+        }
+
+        // update global array with local one
+        for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
+                const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                w_ptr[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
+            }
+        }
+    });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector.
+ * @param[out] prediction the predicted values
+ * @param[in] w the vector to speedup the calculations
+ * @param[in] rho the previously learned bias
+ * @param[in] predict_points the data points to predict
+ */
+inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, const soa_matrix<real_type> &w, const std::vector<real_type> &rho, const soa_matrix<real_type> &predict_points) {
+    PLSSVM_ASSERT(w.num_rows() == rho.size(), "Size mismatch: {} vs {}!", w.num_rows(), rho.size());
+    PLSSVM_ASSERT(w.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", w.num_cols(), predict_points.num_cols());
+    PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
+
+    // calculate constants
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_classes = prediction.num_cols();
+    const auto blocked_num_classes = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_classes) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = predict_points.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_classes);
+    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+        range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
+    });
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [pp, c] = idx;
+        const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                    temp[internal_pp][internal_class] += w_ptr[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * pp_ptr[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
+                }
+            }
+        }
+
+        // perform the dot product calculation
+        for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+            for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) {
+                const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
+
+                if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
+                    prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho_ptr[global_class_idx];
+                }
+            }
+        }
+    });
+}
+
+/**
+ * @brief Predict the @p predict_points_d using the @p kernel_function.
+ * @tparam kernel the type of the used kernel function
+ * @tparam Args the types of the parameters necessary for the specific kernel function
+ * @param[out] prediction the predicted values
+ * @param[in] alpha the previously learned weights
+ * @param[in] rho the previously learned bias
+ * @param[in] support_vectors the support vectors
+ * @param[in] predict_points the data points to predict
+ * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function
+ */
+template <kernel_function_type kernel, typename... Args>
+inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_matrix<real_type> &alpha, const std::vector<real_type> &rho, const soa_matrix<real_type> &support_vectors, const soa_matrix<real_type> &predict_points, Args... kernel_function_parameter) {
+    PLSSVM_ASSERT(alpha.num_rows() == rho.size(), "Size mismatch: {} vs {}!", alpha.num_rows(), rho.size());
+    PLSSVM_ASSERT(alpha.num_cols() == support_vectors.num_rows(), "Size mismatch: {} vs {}!", alpha.num_cols(), support_vectors.num_rows());
+    PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "Size mismatch: {} vs {}!", support_vectors.num_cols(), predict_points.num_cols());
+    PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
+
+    // calculate constants
+    const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_support_vectors = support_vectors.num_rows();
+    const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
+    const std::size_t num_features = predict_points.num_cols();
+
+    // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
+    const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
+    const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+
+    // calculate indices over which we parallelize
+    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_support_vectors);
+    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i)
+        range[i] = std::make_pair(i / blocked_num_support_vectors, i % blocked_num_support_vectors);
+    });
+
+    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+        // calculate the indices used in the current thread
+        const auto [pp, sv] = idx;
+        const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
+        const std::size_t sv_idx = sv * INTERNAL_BLOCK_SIZE_uz;
+
+        // create a thread private array used for internal caching
+        std::array<std::array<real_type, INTERNAL_BLOCK_SIZE>, INTERNAL_BLOCK_SIZE> temp{};
+
+        // iterate over all features
+        for (std::size_t dim = 0; dim < num_features; ++dim) {
+            // perform the feature reduction calculation
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
+
+                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(sv_ptr[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
+                                                                                     pp_ptr[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
+                }
+            }
+        }
+
+        // update temp using the respective kernel function
+        for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+            for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                temp[internal_pp][internal_sv] = detail::apply_kernel_function<kernel>(temp[internal_pp][internal_sv], kernel_function_parameter...);
+            }
+        }
+
+        // add results to prediction
+        for (std::size_t a = 0; a < num_classes; ++a) {
+            for (unsigned internal_pp = 0; internal_pp < INTERNAL_BLOCK_SIZE; ++internal_pp) {
+                for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) {
+                    const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
+                    const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
+
+                    // be sure to not perform out of bounds accesses
+                    if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
+                        if (global_sv_idx == 0) {
+                            atomic_ref<real_type>{ prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho_ptr[a];
+                        }
+                        atomic_ref<real_type>{ prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
+                            temp[internal_pp][internal_sv] * alpha_ptr[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
+                    }
+                }
+            }
+        }
+    });
+}
+
+}  // namespace plssvm::hpx::detail
+
+#endif  // PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_

From 4a762481d523ed904a1d816737f327ad085af5eb Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:17 +0100
Subject: [PATCH 03/48] Add HPX as backend type

---
 include/plssvm/backend_types.hpp | 15 ++++++++++++++-
 src/plssvm/backend_types.cpp     | 10 +++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/include/plssvm/backend_types.hpp b/include/plssvm/backend_types.hpp
index 7bdbcb9e4..449f5dcdd 100644
--- a/include/plssvm/backend_types.hpp
+++ b/include/plssvm/backend_types.hpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -44,7 +45,9 @@ enum class backend_type {
     /** [OpenCL](https://www.khronos.org/opencl/) to target CPUs and GPUs from different vendors. */
     opencl,
     /** [SYCL](https://www.khronos.org/sycl/) to target CPUs and GPUs from different vendors. Currently tested SYCL implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL). */
-    sycl
+    sycl,
+    /** [HPX] (https://hpx.stellar-group.org/) to target CPUs only (currently no GPU support). */
+    hpx
 };
 
 /**
@@ -84,6 +87,7 @@ std::istream &operator>>(std::istream &in, backend_type &backend);
 // Forward declare all possible C-SVMs.
 namespace openmp { class csvm; }
 namespace stdpar { class csvm; }
+namespace hpx { class csvm; }
 namespace cuda { class csvm; }
 namespace hip { class csvm; }
 namespace opencl { class csvm; }
@@ -118,6 +122,15 @@ struct csvm_to_backend_type<stdpar::csvm> {
     constexpr static backend_type value = backend_type::stdpar;
 };
 
+/**
+ * @brief Sets the `value` to `plssvm::backend_type::hpx` for the HPX C-SVM.
+ */
+template <>
+struct csvm_to_backend_type<hpx::csvm> {
+    /// The enum value representing the hpx backend.
+    constexpr static backend_type value = backend_type::hpx;
+};
+
 /**
  * @brief Sets the `value` to `plssvm::backend_type::cuda` for the CUDA C-SVM.
  */
diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp
index 0d01bb837..56da55b8c 100644
--- a/src/plssvm/backend_types.cpp
+++ b/src/plssvm/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,9 @@ std::vector<backend_type> list_available_backends() {
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     available_backends.push_back(backend_type::stdpar);
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    available_backends.push_back(backend_type::hpx);
+#endif
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     available_backends.push_back(backend_type::cuda);
 #endif
@@ -61,7 +65,7 @@ backend_type determine_default_backend(const std::vector<backend_type> &availabl
         decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } },
-        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar } }
+        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar, backend_type::hpx } }
     };
 
     // return the default backend based on the previously defined decision order
@@ -87,6 +91,8 @@ std::ostream &operator<<(std::ostream &out, const backend_type backend) {
             return out << "openmp";
         case backend_type::stdpar:
             return out << "stdpar";
+        case backend_type::hpx:
+            return out << "hpx";
         case backend_type::cuda:
             return out << "cuda";
         case backend_type::hip:
@@ -110,6 +116,8 @@ std::istream &operator>>(std::istream &in, backend_type &backend) {
         backend = backend_type::openmp;
     } else if (str == "stdpar") {
         backend = backend_type::stdpar;
+    } else if (str == "hpx") {
+        backend = backend_type::hpx;
     } else if (str == "cuda") {
         backend = backend_type::cuda;
     } else if (str == "hip") {

From 0902c048983a613ccbc7953d14f5ff72a9843f59 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 04/48] Initial src for HPX backend

---
 src/plssvm/backends/HPX/CMakeLists.txt     |  63 ++++++
 src/plssvm/backends/HPX/csvm.cpp           | 233 +++++++++++++++++++++
 src/plssvm/backends/HPX/detail/utility.cpp |  26 +++
 src/plssvm/backends/HPX/exceptions.cpp     |  22 ++
 4 files changed, 344 insertions(+)
 create mode 100644 src/plssvm/backends/HPX/CMakeLists.txt
 create mode 100644 src/plssvm/backends/HPX/csvm.cpp
 create mode 100644 src/plssvm/backends/HPX/detail/utility.cpp
 create mode 100644 src/plssvm/backends/HPX/exceptions.cpp

diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
new file mode 100644
index 000000000..c7c8f0274
--- /dev/null
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -0,0 +1,63 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+list(APPEND CMAKE_MESSAGE_INDENT "HPX:  ")
+
+# check if HPX can be enabled
+message(CHECK_START "Checking for HPX backend")
+
+find_package(HPX)
+
+if (NOT HPX_FOUND)
+    message(CHECK_FAIL "not found")
+    if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+        message(SEND_ERROR "Cannot find requested backend: HPX!")
+    endif ()
+    return()
+else ()
+    if (NOT DEFINED PLSSVM_CPU_TARGET_ARCHS)
+        if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+            message(SEND_ERROR "Found requested HPX backend, but no \"cpu\" targets were specified!")
+        else ()
+            message(STATUS "Found HPX backend, but no \"cpu\" targets were specified!")
+        endif ()
+        message(CHECK_FAIL "skipped")
+        return()
+    endif ()
+endif ()
+message(CHECK_PASS "found ")
+
+# explicitly set sources
+set(PLSSVM_HPX_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+)
+
+# set target properties
+set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
+add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} HPX::hpx HPX::wrap_main HPX::iostreams_component)
+
+# additional compilation flags
+target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
+
+# link base library against HPX library
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME})
+
+# set compile definition that the HPX backend is available
+target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_HAS_HPX_BACKEND)
+
+# link against interface library
+target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# mark backend library as install target
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+
+# generate summary string
+set(PLSSVM_HPX_BACKEND_SUMMARY_STRING " - HPX: cpu " PARENT_SCOPE)
+
+list(POP_BACK CMAKE_MESSAGE_INDENT)
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
new file mode 100644
index 000000000..c1af0348b
--- /dev/null
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -0,0 +1,233 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/hpx/csvm.hpp"
+
+#include "plssvm/backends/hpx/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::detail::device_kernel_symm
+#include "plssvm/backends/hpx/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::detail::device_kernel_assembly
+#include "plssvm/backends/hpx/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::detail::device_kernel_assembly_symm
+#include "plssvm/backends/hpx/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp"                                                       // plssvm::real_type
+#include "plssvm/csvm.hpp"                                                            // plssvm::csvm
+#include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
+#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"                                            // plssvm::detail::{move_only_any, move_only_any_cast}
+#include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                          // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                       // plssvm::parameter
+#include "plssvm/shape.hpp"                                                           // plssvm::shape
+#include "plssvm/solver_types.hpp"                                                    // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
+
+#include <cstddef>  // std::size_t
+#include <tuple>    // std::tuple, std::make_tuple
+#include <utility>  // std::move
+#include <vector>   // std::vector
+
+namespace plssvm::hpx {
+
+csvm::csvm(parameter params) :
+    csvm{ plssvm::target_platform::automatic, params } { }
+
+csvm::csvm(const target_platform target, parameter params) :
+    ::plssvm::csvm{ params } {
+    this->init(target);
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
+    return { ::plssvm::detail::get_system_memory() };
+}
+
+std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const {
+    return this->get_device_memory();
+}
+
+//***************************************************//
+//                        fit                        //
+//***************************************************//
+
+std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const solver_type solver, const parameter &params, const soa_matrix<real_type> &A, const std::vector<real_type> &q_red, const real_type QA_cost) const {
+    PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+    PLSSVM_ASSERT(!A.empty(), "The matrix to setup on the devices must not be empty!");
+    PLSSVM_ASSERT(A.is_padded(), "The matrix to setup on the devices must be padded!");
+    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+    PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
+
+    std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
+    const real_type cost = real_type{ 1.0 } / params.cost;
+
+    switch (solver) {
+        case solver_type::automatic:
+            // unreachable
+            break;
+        case solver_type::cg_explicit:
+            {
+                const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
+                std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                switch (params.kernel_type) {
+                    case kernel_function_type::linear:
+                        detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
+                        break;
+                    case kernel_function_type::polynomial:
+                        detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                        break;
+                    case kernel_function_type::rbf:
+                        detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                        break;
+                    case kernel_function_type::sigmoid:
+                        detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                        break;
+                    case kernel_function_type::laplacian:
+                        detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                        break;
+                    case kernel_function_type::chi_squared:
+                        detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                        break;
+                }
+
+                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+            }
+            break;
+        case solver_type::cg_implicit:
+            {
+                // simply return data since in implicit we don't assembly the kernel matrix here!
+                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+            }
+            break;
+    }
+
+    return kernel_matrices_parts;
+}
+
+void csvm::blas_level_3(const solver_type solver, const real_type alpha, const std::vector<::plssvm::detail::move_only_any> &A, const soa_matrix<real_type> &B, const real_type beta, soa_matrix<real_type> &C) const {
+    PLSSVM_ASSERT(solver != solver_type::automatic, "An explicit solver type must be provided instead of solver_type::automatic!");
+    PLSSVM_ASSERT(A.size() == 1, "Not enough kernel matrix parts ({}) for the available number of devices (1)!", A.size());
+    PLSSVM_ASSERT(!B.empty(), "The B matrix must not be empty!");
+    PLSSVM_ASSERT(B.is_padded(), "The B matrix must be padded!");
+    PLSSVM_ASSERT(!C.empty(), "The C matrix must not be empty!");
+    PLSSVM_ASSERT(C.is_padded(), "The C matrix must be padded!");
+    PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
+    PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
+
+    switch (solver) {
+        case solver_type::automatic:
+            // unreachable
+            break;
+        case solver_type::cg_explicit:
+            {
+                const std::size_t num_rhs = B.shape().x;
+                const std::size_t num_rows = B.shape().y;
+
+                const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+
+                detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
+            }
+            break;
+        case solver_type::cg_implicit:
+            {
+                const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                const real_type cost = real_type{ 1.0 } / params.cost;
+
+                switch (params.kernel_type) {
+                    case kernel_function_type::linear:
+                        detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
+                        break;
+                    case kernel_function_type::polynomial:
+                        detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                        break;
+                    case kernel_function_type::rbf:
+                        detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                        break;
+                    case kernel_function_type::sigmoid:
+                        detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
+                        break;
+                    case kernel_function_type::laplacian:
+                        detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                        break;
+                    case kernel_function_type::chi_squared:
+                        detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                        break;
+                }
+            }
+            break;
+    }
+}
+
+//***************************************************//
+//                   predict, score                  //
+//***************************************************//
+
+aos_matrix<real_type> csvm::predict_values(const parameter &params,
+                                           const soa_matrix<real_type> &support_vectors,
+                                           const aos_matrix<real_type> &alpha,
+                                           const std::vector<real_type> &rho,
+                                           soa_matrix<real_type> &w,
+                                           const soa_matrix<real_type> &predict_points) const {
+    PLSSVM_ASSERT(!support_vectors.empty(), "The support vectors must not be empty!");
+    PLSSVM_ASSERT(support_vectors.is_padded(), "The support vectors must be padded!");
+    PLSSVM_ASSERT(!alpha.empty(), "The alpha vectors (weights) must not be empty!");
+    PLSSVM_ASSERT(alpha.is_padded(), "The alpha vectors (weights) must be padded!");
+    PLSSVM_ASSERT(support_vectors.num_rows() == alpha.num_cols(), "The number of support vectors ({}) and number of weights ({}) must be the same!", support_vectors.num_rows(), alpha.num_cols());
+    PLSSVM_ASSERT(rho.size() == alpha.num_rows(), "The number of rho values ({}) and the number of weight vectors ({}) must be the same!", rho.size(), alpha.num_rows());
+    PLSSVM_ASSERT(w.empty() || w.is_padded(), "Either w must be empty or must be padded!");
+    PLSSVM_ASSERT(w.empty() || support_vectors.num_cols() == w.num_cols(), "Either w must be empty or contain exactly the same number of values ({}) as features are present ({})!", w.num_cols(), support_vectors.num_cols());
+    PLSSVM_ASSERT(w.empty() || alpha.num_rows() == w.num_rows(), "Either w must be empty or contain exactly the same number of vectors ({}) as the alpha vector ({})!", w.num_rows(), alpha.num_rows());
+    PLSSVM_ASSERT(!predict_points.empty(), "The data points to predict must not be empty!");
+    PLSSVM_ASSERT(predict_points.is_padded(), "The data points to predict must be padded!");
+    PLSSVM_ASSERT(support_vectors.num_cols() == predict_points.num_cols(), "The number of features in the support vectors ({}) must be the same as in the data points to predict ({})!", support_vectors.num_cols(), predict_points.num_cols());
+
+    // defined sizes
+    const std::size_t num_classes = alpha.num_rows();
+    const std::size_t num_predict_points = predict_points.num_rows();
+    const std::size_t num_features = predict_points.num_cols();
+
+    // num_predict_points x num_classes
+    aos_matrix<real_type> out{ plssvm::shape{ num_predict_points, num_classes }, real_type{ 0.0 }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
+
+    if (params.kernel_type == kernel_function_type::linear) {
+        // special optimization for the linear kernel function
+        if (w.empty()) {
+            // fill w vector
+            w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
+            detail::device_kernel_w_linear(w, alpha, support_vectors);
+        }
+    }
+
+    // call the predict kernels
+    switch (params.kernel_type) {
+        case kernel_function_type::linear:
+            // predict the values using the w vector
+            detail::device_kernel_predict_linear(out, w, rho, predict_points);
+            break;
+        case kernel_function_type::polynomial:
+            detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
+            break;
+        case kernel_function_type::rbf:
+            detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+            break;
+        case kernel_function_type::sigmoid:
+            detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
+            break;
+        case kernel_function_type::laplacian:
+            detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+            break;
+        case kernel_function_type::chi_squared:
+            detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+            break;
+    }
+
+    return out;
+}
+
+}  // namespace plssvm::hpx
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
new file mode 100644
index 000000000..26c9dc3af
--- /dev/null
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/hpx/detail/utility.hpp"
+
+#include "plssvm/detail/string_utility.hpp"  // plssvm::detail::as_lower_case
+#include "plssvm/detail/utility.hpp"         // ::plssvm::detail::contains
+#include "plssvm/target_platforms.hpp"       // plssvm::target_platforms
+
+#include "fmt/format.h"  // fmt::format
+
+#include <string>  // std::string
+
+namespace plssvm::hpx::detail {
+
+std::string get_hpx_version() {
+    return "unknown";
+}
+
+}  // namespace plssvm::hpx::detail
diff --git a/src/plssvm/backends/HPX/exceptions.cpp b/src/plssvm/backends/HPX/exceptions.cpp
new file mode 100644
index 000000000..73b51b7fa
--- /dev/null
+++ b/src/plssvm/backends/HPX/exceptions.cpp
@@ -0,0 +1,22 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/hpx/exceptions.hpp"
+
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location
+
+#include <string>  // std::string
+
+namespace plssvm::hpx {
+
+backend_exception::backend_exception(const std::string &msg, source_location loc) :
+    ::plssvm::exception{ msg, "hpx::backend_exception", loc } { }
+
+}  // namespace plssvm::hpx

From 0e3bb1445f2d93bbb67303a77c41c6ea18e5503b Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 05/48] Make HPX backend compile fixes

---
 CMakeLists.txt                                | 13 +++++++-
 include/plssvm/backends/HPX/csvm.hpp          |  7 ----
 .../plssvm/backends/HPX/detail/utility.hpp    | 10 ++++--
 include/plssvm/csvm_factory.hpp               |  5 +++
 src/plssvm/backends/HPX/CMakeLists.txt        |  2 +-
 src/plssvm/backends/HPX/csvm.cpp              | 32 ++++++++++++++++---
 src/plssvm/backends/HPX/detail/utility.cpp    |  7 +++-
 src/plssvm/backends/HPX/exceptions.cpp        |  2 +-
 8 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5ad3f3de8..099dac055 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -376,6 +376,13 @@ if (PLSSVM_ENABLE_STDPAR_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_STDPAR_BACKEND)
     add_subdirectory(src/plssvm/backends/stdpar)
 endif ()
 
+## check for HPX backend
+set(PLSSVM_ENABLE_HPX_BACKEND AUTO CACHE STRING "Enable HPX Backend")
+set_property(CACHE PLSSVM_ENABLE_HPX_BACKEND PROPERTY STRINGS AUTO ON OFF)
+if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_HPX_BACKEND)
+    add_subdirectory(src/plssvm/backends/HPX)
+endif ()
+
 ## check for CUDA backend
 set(PLSSVM_ENABLE_CUDA_BACKEND AUTO CACHE STRING "Enable CUDA Backend")
 set_property(CACHE PLSSVM_ENABLE_CUDA_BACKEND PROPERTY STRINGS AUTO ON OFF)
@@ -705,6 +712,10 @@ if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     message(STATUS "${PLSSVM_STDPAR_BACKEND_SUMMARY_STRING}")
     list(APPEND PLSSVM_BACKEND_NAME_LIST "stdpar")
 endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    message(STATUS "${PLSSVM_HPX_BACKEND_SUMMARY_STRING}")
+    list(APPEND PLSSVM_BACKEND_NAME_LIST "hpx")
+endif ()
 if (TARGET ${PLSSVM_CUDA_BACKEND_LIBRARY_NAME})
     message(STATUS "${PLSSVM_CUDA_BACKEND_SUMMARY_STRING}")
     list(APPEND PLSSVM_BACKEND_NAME_LIST "cuda")
diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
index a2867ac1e..0be0dc86a 100644
--- a/include/plssvm/backends/HPX/csvm.hpp
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -14,7 +14,6 @@
 #define PLSSVM_BACKENDS_HPX_CSVM_HPP_
 #pragma once
 
-#include "plssvm/backends/hpx/implementation_types.hpp"  // plssvm::hpx::implementation_type
 #include "plssvm/constants.hpp"                             // plssvm::real_type
 #include "plssvm/csvm.hpp"                                  // plssvm::csvm, plssvm::detail::csvm_backend_exists
 #include "plssvm/detail/memory_size.hpp"                    // plssvm::detail::memory_size
@@ -114,12 +113,6 @@ class csvm : public ::plssvm::csvm {
         return 1;
     }
 
-    /**
-     * @brief Return the hpx implementation type.
-     * @return the hpx implementation type (`[[nodiscard]]`)
-     */
-    [[nodiscard]] implementation_type get_implementation_type() const noexcept;
-
     protected:
     /**
      * @copydoc plssvm::csvm::get_device_memory
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 1dee82838..120a8a4de 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -23,10 +23,16 @@ namespace plssvm::hpx::detail {
 using boost::atomic_ref;
 
 /**
- * @brief Return the version of the HPX backend.
+ * @brief Return the number of used CPU threads in the HPX backend.
+ * @return the number of used CPU threads (`[[nodiscard]]`)
+ */
+[[nodiscard]] int get_num_threads();
+
+/**
+ * @brief Return the HPX version used.
  * @return the HPX version (`[[nodiscard]]`)
  */
-// [[nodiscard]] std::string get_hpx_version();
+[[nodiscard]] std::string get_hpx_version();
 
 }  // namespace plssvm::hpx::detail
 
diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp
index 01a2769ec..a1272a5e0 100644
--- a/include/plssvm/csvm_factory.hpp
+++ b/include/plssvm/csvm_factory.hpp
@@ -28,6 +28,9 @@
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     #include "plssvm/backends/stdpar/csvm.hpp"  // plssvm::stdpar::csvm, plssvm::csvm_backend_exists_v
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include "plssvm/backends/HPX/csvm.hpp"  // plssvm::hpx::csvm, plssvm::csvm_backend_exists_v
+#endif
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     #include "plssvm/backends/CUDA/csvm.hpp"  // plssvm::cuda::csvm, plssvm::csvm_backend_exists_v
 #endif
@@ -130,6 +133,8 @@ template <typename... Args>
             return make_csvm_default_impl<openmp::csvm>(std::forward<Args>(args)...);
         case backend_type::stdpar:
             return make_csvm_default_impl<stdpar::csvm>(std::forward<Args>(args)...);
+        case backend_type::hpx:
+            return make_csvm_default_impl<hpx::csvm>(std::forward<Args>(args)...);
         case backend_type::cuda:
             return make_csvm_default_impl<cuda::csvm>(std::forward<Args>(args)...);
         case backend_type::hip:
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index c7c8f0274..16b3691e5 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -40,7 +40,7 @@ set(PLSSVM_HPX_SOURCES
 # set target properties
 set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
 add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
-target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} HPX::hpx HPX::wrap_main HPX::iostreams_component)
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main HPX::iostreams_component)
 
 # additional compilation flags
 target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
index c1af0348b..fba56e333 100644
--- a/src/plssvm/backends/HPX/csvm.cpp
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -7,12 +7,12 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "plssvm/backends/hpx/csvm.hpp"
+#include "plssvm/backends/HPX/csvm.hpp"
 
-#include "plssvm/backends/hpx/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::detail::device_kernel_symm
-#include "plssvm/backends/hpx/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::detail::device_kernel_assembly
-#include "plssvm/backends/hpx/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::detail::device_kernel_assembly_symm
-#include "plssvm/backends/hpx/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::detail::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::detail::device_kernel_assembly
+#include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::detail::device_kernel_assembly_symm
+#include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
 #include "plssvm/constants.hpp"                                                       // plssvm::real_type
 #include "plssvm/csvm.hpp"                                                            // plssvm::csvm
 #include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
@@ -20,6 +20,7 @@
 #include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
 #include "plssvm/detail/move_only_any.hpp"                                            // plssvm::detail::{move_only_any, move_only_any_cast}
 #include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/backends/HPX/exceptions.hpp"                                         // plssvm::hpx::backend_exception
 #include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                                                          // plssvm::aos_matrix, plssvm::soa_matrix
 #include "plssvm/parameter.hpp"                                                       // plssvm::parameter
@@ -42,6 +43,27 @@ csvm::csvm(const target_platform target, parameter params) :
     this->init(target);
 }
 
+void csvm::init(const target_platform target) {
+    // check if supported target platform has been selected
+    if (target != target_platform::automatic && target != target_platform::cpu) {
+        throw backend_exception{ fmt::format("Invalid target platform '{}' for the HPX backend!", target) };
+    }
+    // the CPU target must be available
+#if !defined(PLSSVM_HAS_CPU_TARGET)
+    throw backend_exception{ "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!" };
+#endif
+
+    plssvm::detail::log(verbosity_level::full,
+                        "\nUsing HPX ({}) as backend with {} thread(s).\n\n",
+                        plssvm::detail::tracking::tracking_entry{ "dependencies", "hpx_version", detail::get_hpx_version() },
+                        plssvm::detail::tracking::tracking_entry{ "backend", "num_threads", detail::get_num_threads() });
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::hpx }));
+    PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", plssvm::target_platform::cpu }));
+
+    // update the target platform
+    target_ = plssvm::target_platform::cpu;
+}
+
 std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const {
     return { ::plssvm::detail::get_system_memory() };
 }
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index 26c9dc3af..8306d5e48 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -7,7 +7,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "plssvm/backends/hpx/detail/utility.hpp"
+#include "plssvm/backends/HPX/detail/utility.hpp"
 
 #include "plssvm/detail/string_utility.hpp"  // plssvm::detail::as_lower_case
 #include "plssvm/detail/utility.hpp"         // ::plssvm::detail::contains
@@ -23,4 +23,9 @@ std::string get_hpx_version() {
     return "unknown";
 }
 
+int get_num_threads() {
+    // get the number of used HPX threads
+    int num_hpx_threads{-1};
+    return num_hpx_threads;
+}
 }  // namespace plssvm::hpx::detail
diff --git a/src/plssvm/backends/HPX/exceptions.cpp b/src/plssvm/backends/HPX/exceptions.cpp
index 73b51b7fa..8b1da9124 100644
--- a/src/plssvm/backends/HPX/exceptions.cpp
+++ b/src/plssvm/backends/HPX/exceptions.cpp
@@ -7,7 +7,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "plssvm/backends/hpx/exceptions.hpp"
+#include "plssvm/backends/HPX/exceptions.hpp"
 
 #include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
 #include "plssvm/exceptions/source_location.hpp"  // plssvm::source_location

From b68864e3c25d1f84b318662ead075751521e1631 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 06/48] Training working with start/stop

---
 .../backends/HPX/kernel/cg_explicit/blas.hpp  |  9 +++---
 .../cg_explicit/kernel_matrix_assembly.hpp    | 11 ++++----
 .../kernel_matrix_assembly_blas.hpp           | 12 ++++----
 .../backends/HPX/kernel/predict_kernel.hpp    | 28 +++++++++----------
 src/main_train.cpp                            | 15 +++++++++-
 5 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 9f895a67f..19dc567b3 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -18,7 +18,6 @@
 #include "plssvm/detail/assert.hpp"  // PLSSVM_ASSERT
 #include "plssvm/matrix.hpp"         // plssvm::soa_matrix
 #include "plssvm/shape.hpp"          // plssvm::shape
-
 #include <hpx/execution.hpp>                        // hpx::execution::par_unseq
 #include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
 #include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
@@ -55,12 +54,12 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_rhs * blocked_num_rows);
-    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i)
+    ::hpx::threads::run_as_hpx_thread([blocked_num_rows, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i)
     {
         range[i] = std::make_pair(i / blocked_num_rows, i % blocked_num_rows);
-    });
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [rhs, row] = idx;
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
@@ -101,7 +100,7 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
                 }
             }
         }
-    });
+    });});
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index 5db263ce4..ba4cda9e4 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -14,7 +14,7 @@
 #define PLSSVM_BACKENDS_HPX_KERNEL_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_
 #pragma once
 
-#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
 #include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
 #include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
@@ -58,17 +58,16 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    hpx::experimental::for_loop(hpx::execution::par, 0, blocked_dept * blocked_dept, [&](auto i)
-    {
+    ::hpx::threads::run_as_hpx_thread([blocked_dept, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i){
         const std::size_t row = i / blocked_dept;
         const std::size_t col = i % blocked_dept;
         // only create valid row <-> col index pairs
         if (row >= col) {
             range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
         }
-    })
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -110,7 +109,7 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                 }
             }
         }
-    });
+    });});
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 56a3ef4b9..5f433d614 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -14,8 +14,8 @@
 #define PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
 #pragma once
 
-#include "plssvm/backends/hpx/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
-#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/backends/HPX/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
 #include "plssvm/constants.hpp"                                // plssvm::real_type
 #include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
 #include "plssvm/detail/operators.hpp"                         // overloaded arithmetic operations for a plssvm::matrix
@@ -72,7 +72,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    hpx::experimental::for_loop(hpx::execution::par, 0, blocked_dept * blocked_dept, [&](auto i)
+    ::hpx::threads::run_as_hpx_thread([blocked_dept, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i)
     {
         const std::size_t row = i / blocked_dept;
         const std::size_t col = i % blocked_dept;
@@ -80,9 +80,9 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
         if (row >= col) {
             range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
         }
-    }
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+     ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -132,7 +132,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                 }
             }
         }
-    });
+    });});
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 5182dd11b..28e44ba23 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -14,8 +14,8 @@
 #define PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
 #pragma once
 
-#include "plssvm/backends/hpx/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
-#include "plssvm/backends/hpx/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/backends/HPX/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
 #include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
 #include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
 #include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
@@ -56,11 +56,11 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_features * blocked_num_classes);
-    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+    ::hpx::threads::run_as_hpx_thread([blocked_num_classes, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
+     ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [feature, c] = idx;
         const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
@@ -91,7 +91,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
                 w_ptr[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
             }
         }
-    });
+    });});
 }
 
 /**
@@ -119,11 +119,11 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_classes);
-    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+    ::hpx::threads::run_as_hpx_thread([blocked_num_classes, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, c] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -156,7 +156,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
                 }
             }
         }
-    });
+    });});
 }
 
 /**
@@ -191,11 +191,11 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_support_vectors);
-    hpx::experimental::for_loop(hpx::execution::par_unseq, 0, range.size(), [&](auto i)
+    ::hpx::threads::run_as_hpx_thread([blocked_num_support_vectors, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_support_vectors, i % blocked_num_support_vectors);
-    });
+    });});
 
-    hpx::for_each(hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, sv] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -243,7 +243,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                 }
             }
         }
-    });
+    });});
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 7f99409c7..5681d84e2 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -16,6 +16,10 @@
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+#include <hpx/hpx_start.hpp> 
+#endif
+
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
@@ -53,6 +57,11 @@ int main(int argc, char *argv[]) {
         // parse SVM parameter from command line
         plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+        // Initialize HPX, run hpx_main.
+        hpx::start(argc, argv);
+#endif
+
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
             plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
@@ -116,6 +125,10 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
-
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    // Wait for hpx::finalize being called.
+    return hpx::stop();
+#else
     return EXIT_SUCCESS;
+#endif
 }

From c57ff02a927e2247dafdad9dd1f56f91919c583d Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 07/48] Add HPX runtime init to predict

---
 src/main_predict.cpp | 15 ++++++++++++++-
 src/main_train.cpp   |  7 ++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 079b6ca00..90ae575a9 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -17,6 +17,10 @@
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+#include <hpx/hpx_start.hpp> 
+#endif
+
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
@@ -56,6 +60,10 @@ int main(int argc, char *argv[]) {
         // parse SVM parameter from command line
         const plssvm::detail::cmd::parser_predict cmd_parser{ argc, argv };
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+       // Initialize HPX, don't run hpx_main
+        hpx::start(nullptr, argc, argv); 
+#endif
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
             plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
@@ -175,6 +183,11 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
-
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    // TODO: hpx::finalize has to be called from the HPX runtime before hpx::stop
+    // hpx::post([]() { hpx::finalize(); });
+    return hpx::stop();
+#else
     return EXIT_SUCCESS;
+#endif
 }
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 5681d84e2..33e2b5197 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -58,8 +58,8 @@ int main(int argc, char *argv[]) {
         plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-        // Initialize HPX, run hpx_main.
-        hpx::start(argc, argv);
+       // Initialize HPX, don't run hpx_main
+        hpx::start(nullptr, argc, argv); 
 #endif
 
         // send warning if the build type is release and assertions are enabled
@@ -126,7 +126,8 @@ int main(int argc, char *argv[]) {
         return EXIT_FAILURE;
     }
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    // Wait for hpx::finalize being called.
+    // TODO: hpx::finalize has to be called from the HPX runtime before hpx::stop
+    // hpx::post([]() { hpx::finalize(); });
     return hpx::stop();
 #else
     return EXIT_SUCCESS;

From c9e38ccd93536535302e766086a25df0b411a04c Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 08/48] Add some TODO for convencience functions

---
 src/plssvm/backends/HPX/detail/utility.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index 8306d5e48..7782b379c 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -19,10 +19,12 @@
 
 namespace plssvm::hpx::detail {
 
+// TODO: implement function
 std::string get_hpx_version() {
     return "unknown";
 }
 
+// TODO: implement function
 int get_num_threads() {
     // get the number of used HPX threads
     int num_hpx_threads{-1};

From 657053f53a5da44ee7413079d1820ceb7c098ee4 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 09/48] Remove ::run_as_hpx_thread()

---
 .../backends/HPX/kernel/cg_explicit/blas.hpp  |  8 +++----
 .../cg_explicit/kernel_matrix_assembly.hpp    |  8 +++----
 .../kernel_matrix_assembly_blas.hpp           |  8 +++----
 .../backends/HPX/kernel/predict_kernel.hpp    | 24 +++++++++----------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 19dc567b3..68bc0b239 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -54,12 +54,12 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_rhs * blocked_num_rows);
-    ::hpx::threads::run_as_hpx_thread([blocked_num_rows, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i)
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i)
     {
         range[i] = std::make_pair(i / blocked_num_rows, i % blocked_num_rows);
-    });});
+    });
 
-    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [rhs, row] = idx;
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
@@ -100,7 +100,7 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
                 }
             }
         }
-    });});
+    });
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index ba4cda9e4..eab768662 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -58,16 +58,16 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    ::hpx::threads::run_as_hpx_thread([blocked_dept, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i){
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i){
         const std::size_t row = i / blocked_dept;
         const std::size_t col = i % blocked_dept;
         // only create valid row <-> col index pairs
         if (row >= col) {
             range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
         }
-    });});
+    });
 
-    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -109,7 +109,7 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                 }
             }
         }
-    });});
+    });
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 5f433d614..5db6035a4 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -72,7 +72,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    ::hpx::threads::run_as_hpx_thread([blocked_dept, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i)
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i)
     {
         const std::size_t row = i / blocked_dept;
         const std::size_t col = i % blocked_dept;
@@ -80,9 +80,9 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
         if (row >= col) {
             range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
         }
-    });});
+    });
 
-     ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -132,7 +132,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                 }
             }
         }
-    });});
+    });
 }
 
 }  // namespace plssvm::hpx::detail
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 28e44ba23..5da57b09d 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -56,11 +56,11 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_features * blocked_num_classes);
-    ::hpx::threads::run_as_hpx_thread([blocked_num_classes, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });});
+    });
 
-     ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [feature, c] = idx;
         const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
@@ -91,7 +91,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
                 w_ptr[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
             }
         }
-    });});
+    });
 }
 
 /**
@@ -119,11 +119,11 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_classes);
-    ::hpx::threads::run_as_hpx_thread([blocked_num_classes, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });});
+    });
 
-    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, c] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -156,7 +156,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
                 }
             }
         }
-    });});
+    });
 }
 
 /**
@@ -191,11 +191,11 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
 
     // calculate indices over which we parallelize
     std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_support_vectors);
-    ::hpx::threads::run_as_hpx_thread([blocked_num_support_vectors, &range](){::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
+    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
         range[i] = std::make_pair(i / blocked_num_support_vectors, i % blocked_num_support_vectors);
-    });});
+    });
 
-    ::hpx::threads::run_as_hpx_thread([&](){::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, sv] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -243,7 +243,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                 }
             }
         }
-    });});
+    });
 }
 
 }  // namespace plssvm::hpx::detail

From 4b7138e84a57a503a88f324dc51504af5e6d3855 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:18 +0100
Subject: [PATCH 10/48] Add HPX thread return

---
 src/main_train.cpp                         | 54 +++++++++++++++-------
 src/plssvm/backends/HPX/csvm.cpp           | 16 +++++--
 src/plssvm/backends/HPX/detail/utility.cpp |  5 +-
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/src/main_train.cpp b/src/main_train.cpp
index 33e2b5197..03c43fb4f 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -16,14 +16,13 @@
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-#include <hpx/hpx_start.hpp> 
-#endif
-
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
-
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
+    #include <hpx/execution.hpp>                                    // hpx::post
+#endif
 #include <algorithm>    // std::for_each
 #include <chrono>       // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace
 #include <cstddef>      // std::size_t
@@ -57,11 +56,6 @@ int main(int argc, char *argv[]) {
         // parse SVM parameter from command line
         plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-       // Initialize HPX, don't run hpx_main
-        hpx::start(nullptr, argc, argv); 
-#endif
-
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
             plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
@@ -74,6 +68,14 @@ int main(int argc, char *argv[]) {
                             "\ntask: training\n{}\n\n\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+        const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+        if (use_hpx_as_backend){
+            // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
+            // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
+            hpx::start(nullptr, 0, nullptr);
+        }
+#endif
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
@@ -88,6 +90,14 @@ int main(int argc, char *argv[]) {
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)
                                                                           : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params);
 
+#if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
+            // initialize hardware sampling
+            std::vector<std::unique_ptr<plssvm::detail::tracking::hardware_sampler>> sampler =
+                plssvm::detail::tracking::create_hardware_sampler(svm->get_target_platform(), svm->num_available_devices(), PLSSVM_HARDWARE_SAMPLING_INTERVAL);
+            // start sampling
+            std::for_each(sampler.begin(), sampler.end(), std::mem_fn(&plssvm::detail::tracking::hardware_sampler::start_sampling));
+#endif
+
             // only specify plssvm::max_iter if it isn't its default value
             const plssvm::model<label_type> model =
                 cmd_parser.max_iter == std::size_t{ 0 }
@@ -102,7 +112,15 @@ int main(int argc, char *argv[]) {
                                plssvm::solver = cmd_parser.solver);
             // save model to file
             model.save(cmd_parser.model_filename);
-        };
+
+#if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
+            // stop sampling
+            std::for_each(sampler.begin(), sampler.end(), std::mem_fn(&plssvm::detail::tracking::hardware_sampler::stop_sampling));
+            // write samples to yaml file
+            std::for_each(sampler.cbegin(), sampler.cend(), [&](const std::unique_ptr<plssvm::detail::tracking::hardware_sampler> &s) {
+                PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY(*s);
+            });
+#endif
         std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
 
         // stop CPU hardware sampler and dump results if available
@@ -118,6 +136,14 @@ int main(int argc, char *argv[]) {
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+        if (use_hpx_as_backend){
+            // Finalize all existing HPX tasks
+            hpx::post([]{hpx::finalize();});
+            // Stop HPX runtime
+            hpx::stop();
+        }
+#endif
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
@@ -125,11 +151,5 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    // TODO: hpx::finalize has to be called from the HPX runtime before hpx::stop
-    // hpx::post([]() { hpx::finalize(); });
-    return hpx::stop();
-#else
     return EXIT_SUCCESS;
-#endif
 }
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
index fba56e333..b682f63a6 100644
--- a/src/plssvm/backends/HPX/csvm.cpp
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -84,6 +84,7 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
     PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
 
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
+    ::hpx::future<void> wait = ::hpx::async([&](){
     const real_type cost = real_type{ 1.0 } / params.cost;
 
     switch (solver) {
@@ -125,7 +126,9 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
             }
             break;
     }
-
+    });
+    // wait until operation is completed
+    wait.get();
     return kernel_matrices_parts;
 }
 
@@ -139,6 +142,7 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
     PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
     PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
 
+    ::hpx::future<void> wait = ::hpx::async([&](){
     switch (solver) {
         case solver_type::automatic:
             // unreachable
@@ -184,6 +188,9 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
             }
             break;
     }
+    });
+    // wait until operation is completed
+    wait.get();
 }
 
 //***************************************************//
@@ -216,7 +223,8 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
 
     // num_predict_points x num_classes
     aos_matrix<real_type> out{ plssvm::shape{ num_predict_points, num_classes }, real_type{ 0.0 }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-
+    
+    ::hpx::future<void> wait = ::hpx::async([&](){
     if (params.kernel_type == kernel_function_type::linear) {
         // special optimization for the linear kernel function
         if (w.empty()) {
@@ -248,7 +256,9 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
             detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
             break;
     }
-
+    });
+    // wait until operation is completed
+    wait.get();
     return out;
 }
 
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index 7782b379c..c6cdcb970 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -6,7 +6,7 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  */
-
+#include <hpx/runtime_distributed.hpp>
 #include "plssvm/backends/HPX/detail/utility.hpp"
 
 #include "plssvm/detail/string_utility.hpp"  // plssvm::detail::as_lower_case
@@ -27,7 +27,6 @@ std::string get_hpx_version() {
 // TODO: implement function
 int get_num_threads() {
     // get the number of used HPX threads
-    int num_hpx_threads{-1};
-    return num_hpx_threads;
+    return static_cast<int>(::hpx::get_num_worker_threads());
 }
 }  // namespace plssvm::hpx::detail

From bea35d3c895372b6662a00681375fe5d53cf3456 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:19 +0100
Subject: [PATCH 11/48] Add HPX runtime to predict

---
 src/main_predict.cpp | 35 ++++++++++++++++++++---------------
 src/main_train.cpp   |  1 +
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 90ae575a9..e45554205 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -17,14 +17,13 @@
                                                            // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME
 #include "plssvm/detail/utility.hpp"                       // PLSSVM_IS_DEFINED
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-#include <hpx/hpx_start.hpp> 
-#endif
-
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
-
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
+    #include <hpx/execution.hpp>                                    // hpx::post
+#endif
 #include "fmt/format.h"  // fmt::print
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
 #include "fmt/ranges.h"  // fmt::join
@@ -60,10 +59,6 @@ int main(int argc, char *argv[]) {
         // parse SVM parameter from command line
         const plssvm::detail::cmd::parser_predict cmd_parser{ argc, argv };
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-       // Initialize HPX, don't run hpx_main
-        hpx::start(nullptr, argc, argv); 
-#endif
         // send warning if the build type is release and assertions are enabled
         if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) {
             plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning,
@@ -76,6 +71,14 @@ int main(int argc, char *argv[]) {
                             "\ntask: prediction\n{}\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+        const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+        if (use_hpx_as_backend){
+            // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
+            // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
+            hpx::start(nullptr, 0, nullptr);
+        }
+#endif
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
@@ -176,6 +179,14 @@ int main(int argc, char *argv[]) {
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+        if (use_hpx_as_backend){
+            // Finalize all existing HPX tasks
+            hpx::post([]{hpx::finalize();});
+            // Stop HPX runtime
+            hpx::stop();
+        }
+#endif
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
@@ -183,11 +194,5 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    // TODO: hpx::finalize has to be called from the HPX runtime before hpx::stop
-    // hpx::post([]() { hpx::finalize(); });
-    return hpx::stop();
-#else
     return EXIT_SUCCESS;
-#endif
 }
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 03c43fb4f..d7c8ce105 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -151,5 +151,6 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
+
     return EXIT_SUCCESS;
 }

From 43274dd7b909e392adfeec63afe0b909aa3575c1 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:19 +0100
Subject: [PATCH 12/48] Change pointer call by value to references

---
 .../backends/HPX/kernel/cg_explicit/blas.hpp  | 10 ++++----
 .../cg_explicit/kernel_matrix_assembly.hpp    | 11 +++++----
 .../kernel_matrix_assembly_blas.hpp           | 12 +++++-----
 .../backends/HPX/kernel/predict_kernel.hpp    | 24 +++++++++----------
 4 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 68bc0b239..67b99b764 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -59,7 +59,7 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
         range[i] = std::make_pair(i / blocked_num_rows, i % blocked_num_rows);
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, A_ptr = A.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [rhs, row] = idx;
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
@@ -79,11 +79,11 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
                     real_type A_val = 0.0;
                     // determine on which side of the diagonal we are located
                     if (dim < global_j) {
-                        A_val = A_ptr[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
+                        A_val = A.data()[dim * (num_rows + PADDING_SIZE_uz) + global_j - dim * (dim + std::size_t{ 1 }) / std::size_t{ 2 }];
                     } else {
-                        A_val = A_ptr[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
+                        A_val = A.data()[global_j * (num_rows + PADDING_SIZE_uz) + dim - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }];
                     }
-                    temp[internal_i][internal_j] += A_val * B_ptr[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
+                    temp[internal_i][internal_j] += A_val * B.data()[dim * (num_rhs + PADDING_SIZE_uz) + global_i];
                 }
             }
         }
@@ -96,7 +96,7 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
 
                 // be sure to not perform out of bounds accesses
                 if (global_i < num_rhs && global_j < num_rows) {
-                    C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C_ptr[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
+                    C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i] = alpha * temp[internal_i][internal_j] + beta * C.data()[global_j * (num_rhs + PADDING_SIZE_uz) + global_i];
                 }
             }
         }
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index eab768662..e41689661 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -67,7 +67,10 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
         }
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), kernel_matrix_ptr = kernel_matrix.data()](const std::pair<std::size_t, std::size_t> idx) {
+ // ::hpx::experimental::for_loop(::hpx::execution::par, 0, range.size(), [&](const std::size_t idx){
+ //        // calculate the indices used in the current thread
+ //        const auto [row, col] = range[idx];
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -85,7 +88,7 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                     const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
                     const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
 
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
                 }
             }
         }
@@ -100,12 +103,12 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
                 // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
                 if (global_row < dept && global_col < dept && global_row >= global_col) {
                     real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
                     // apply the cost on the diagonal
                     if (global_row == global_col) {
                         temp_ij += cost;
                     }
-                    kernel_matrix_ptr[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
+                    kernel_matrix[global_col * (dept + PADDING_SIZE_uz) + global_row - global_col * (global_col + std::size_t{ 1 }) / std::size_t{ 2 }] = temp_ij;
                 }
             }
         }
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 5db6035a4..42d3da05d 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -82,7 +82,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
         }
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, q_ptr = q.data(), data_ptr = data.data(), B_ptr = B.data(), C_ptr = C.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [row, col] = idx;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
@@ -99,7 +99,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                     const std::size_t global_row = row_idx + static_cast<std::size_t>(internal_row);
                     const std::size_t global_col = col_idx + static_cast<std::size_t>(internal_col);
 
-                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data_ptr[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
+                    temp[internal_row][internal_col] += detail::feature_reduce<kernel>(data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_row], data.data()[dim * (dept + 1 + PADDING_SIZE_uz) + global_col]);
                 }
             }
         }
@@ -113,20 +113,20 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
                 // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix)
                 if (global_row < dept && global_col < dept && global_row >= global_col) {
                     real_type temp_ij = temp[internal_row][internal_col];
-                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q_ptr[global_row] - q_ptr[global_col];
+                    temp_ij = detail::apply_kernel_function<kernel>(temp_ij, kernel_function_parameter...) + QA_cost - q[global_row] - q[global_col];
                     // apply the cost on the diagonal
                     if (global_row == global_col) {
                         temp_ij += cost;
                         // calculate the values of alpha * A * B
                         for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
                         }
                     } else {
                         // calculate the values of alpha * A * B
                         for (std::size_t class_idx = 0; class_idx < num_classes; ++class_idx) {
-                            atomic_ref<real_type>{ C_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            atomic_ref<real_type>{ C.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx];
                             // symmetry
-                            atomic_ref<real_type>{ C_ptr[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B_ptr[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
+                            atomic_ref<real_type>{ C.data()[global_col * (num_classes + PADDING_SIZE_uz) + class_idx] } += alpha * temp_ij * B.data()[global_row * (num_classes + PADDING_SIZE_uz) + class_idx];
                         }
                     }
                 }
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 5da57b09d..82085d465 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -60,7 +60,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, w_ptr = w.data(), alpha_ptr = alpha.data(), sv_ptr = support_vectors.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [feature, c] = idx;
         const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
@@ -77,7 +77,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
                     const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                    temp[internal_feature][internal_class] += alpha_ptr[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * sv_ptr[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
+                    temp[internal_feature][internal_class] += alpha.data()[global_class_idx * (num_support_vectors + PADDING_SIZE_uz) + sv] * support_vectors.data()[global_feature_idx * (num_support_vectors + PADDING_SIZE_uz) + sv];
                 }
             }
         }
@@ -88,7 +88,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
                 const std::size_t global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
                 const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                w_ptr[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
+                w.data()[global_feature_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_feature][internal_class];
             }
         }
     });
@@ -123,7 +123,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
         range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), w_ptr = w.data(), rho_ptr = rho.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, c] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -140,7 +140,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
                     const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
-                    temp[internal_pp][internal_class] += w_ptr[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * pp_ptr[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
+                    temp[internal_pp][internal_class] += w.data()[dim * (num_classes + PADDING_SIZE_uz) + global_class_idx] * predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx];
                 }
             }
         }
@@ -152,7 +152,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
                 const std::size_t global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
 
                 if (global_pp_idx < num_predict_points && global_class_idx < num_classes) {
-                    prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho_ptr[global_class_idx];
+                    prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + global_class_idx] = temp[internal_pp][internal_class] - rho.data()[global_class_idx];
                 }
             }
         }
@@ -195,7 +195,7 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
         range[i] = std::make_pair(i / blocked_num_support_vectors, i % blocked_num_support_vectors);
     });
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [=, prediction_ptr = prediction.data(), alpha_ptr = alpha.data(), rho_ptr = rho.data(), sv_ptr = support_vectors.data(), pp_ptr = predict_points.data()](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
         // calculate the indices used in the current thread
         const auto [pp, sv] = idx;
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
@@ -212,8 +212,8 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                     const std::size_t global_pp_idx = pp_idx + static_cast<std::size_t>(internal_pp);
                     const std::size_t global_sv_idx = sv_idx + static_cast<std::size_t>(internal_sv);
 
-                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(sv_ptr[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
-                                                                                     pp_ptr[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
+                    temp[internal_pp][internal_sv] += detail::feature_reduce<kernel>(support_vectors.data()[dim * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx],
+                                                                                     predict_points.data()[dim * (num_predict_points + PADDING_SIZE_uz) + global_pp_idx]);
                 }
             }
         }
@@ -235,10 +235,10 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
                     // be sure to not perform out of bounds accesses
                     if (global_pp_idx < num_predict_points && global_sv_idx < num_support_vectors) {
                         if (global_sv_idx == 0) {
-                            atomic_ref<real_type>{ prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho_ptr[a];
+                            atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } += -rho.data()[a];
                         }
-                        atomic_ref<real_type>{ prediction_ptr[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
-                            temp[internal_pp][internal_sv] * alpha_ptr[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
+                        atomic_ref<real_type>{ prediction.data()[global_pp_idx * (num_classes + PADDING_SIZE_uz) + a] } +=
+                            temp[internal_pp][internal_sv] * alpha.data()[a * (num_support_vectors + PADDING_SIZE_uz) + global_sv_idx];
                     }
                 }
             }

From 9f4ca905d7094916ba5dbf4e96773d0dd0f504e1 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:19 +0100
Subject: [PATCH 13/48] Do index computation on the fly like stdpar

---
 .../backends/HPX/kernel/cg_explicit/blas.hpp  | 23 +++++----
 .../cg_explicit/kernel_matrix_assembly.hpp    | 31 +++++-------
 .../kernel_matrix_assembly_blas.hpp           | 27 ++++-------
 .../backends/HPX/kernel/predict_kernel.hpp    | 47 +++++++++----------
 src/main_predict.cpp                          |  1 +
 5 files changed, 56 insertions(+), 73 deletions(-)

diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 67b99b764..8bd36f102 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -18,13 +18,13 @@
 #include "plssvm/detail/assert.hpp"  // PLSSVM_ASSERT
 #include "plssvm/matrix.hpp"         // plssvm::soa_matrix
 #include "plssvm/shape.hpp"          // plssvm::shape
-#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
-#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+
+#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
 #include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
 #include <array>      // std::array
 #include <cmath>      // std::ceil
 #include <cstddef>    // std::size_t
-#include <utility>    // std::pair, std::make_pair
+#include <numeric>    // std::iota
 #include <vector>     // std::vector
 
 namespace plssvm::hpx::detail {
@@ -51,17 +51,16 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
+    
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);   // define range over which should be iterated
+    std::iota(range.begin(), range.end(), 0);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_rhs * blocked_num_rows);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i)
-    {
-        range[i] = std::make_pair(i / blocked_num_rows, i % blocked_num_rows);
-    });
-
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [rhs, row] = idx;
+        const std::size_t rhs = idx / blocked_num_rows;
+        const std::size_t row = idx % blocked_num_rows;
+
         const std::size_t rhs_idx = rhs * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index e41689661..f94ff1c1a 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -20,13 +20,12 @@
 #include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
 #include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix
 
-#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
-#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
-#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
+#include <hpx/execution.hpp>                                // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>   // hpx::for_each
 #include <array>      // std::array
-#include <cmath>      // std::ceil
+#include <cmath>      // std::ceil, std::sqrt
 #include <cstddef>    // std::size_t
-#include <utility>    // std::pair, std::make_pair
+#include <numeric>    // std::iota
 #include <vector>     // std::vector
 
 namespace plssvm::hpx::detail {
@@ -56,23 +55,15 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i){
-        const std::size_t row = i / blocked_dept;
-        const std::size_t col = i % blocked_dept;
-        // only create valid row <-> col index pairs
-        if (row >= col) {
-            range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
-        }
-    });
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
+    std::iota(range.begin(), range.end(), 0);
 
- // ::hpx::experimental::for_loop(::hpx::execution::par, 0, range.size(), [&](const std::size_t idx){
- //        // calculate the indices used in the current thread
- //        const auto [row, col] = range[idx];
- ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+ ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [row, col] = idx;
+        const std::size_t col = static_cast<std::size_t>(0.5 * (2 * blocked_dept + 1 - std::sqrt(4 * blocked_dept * blocked_dept + 4 * blocked_dept - 8 * idx + 1)));
+        const std::size_t row = static_cast<std::size_t>(idx - (col * blocked_dept - 0.5 * col * col - 0.5 * col));
+       
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 42d3da05d..5c1cd6d3f 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -23,13 +23,12 @@
 #include "plssvm/kernel_functions.hpp"                         // plssvm::kernel_function
 #include "plssvm/matrix.hpp"                                   // aos_matrix
 
-#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
-#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
 #include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
 #include <array>      // std::array
 #include <cmath>      // std::ceil
-#include <cstddef>    // std::size_t
-#include <utility>    // std::pair, std::make_pair
+#include <cstddef>    // std::size_t, std::sqrt
+#include <numeric>    // std::iota
 #include <vector>     // std::vector
 
 namespace plssvm::hpx::detail {
@@ -70,21 +69,15 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_dept * (blocked_dept + 1) / 2);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, blocked_dept * blocked_dept, [&](auto i)
-    {
-        const std::size_t row = i / blocked_dept;
-        const std::size_t col = i % blocked_dept;
-        // only create valid row <-> col index pairs
-        if (row >= col) {
-            range[col * blocked_dept + row - col * (col + 1) / 2] = std::make_pair(row, col);
-        }
-    });
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
+    std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [row, col] = idx;
+        const std::size_t col = static_cast<std::size_t>(0.5 * (2 * blocked_dept + 1 - std::sqrt(4 * blocked_dept * blocked_dept + 4 * blocked_dept - 8 * idx + 1)));
+        const std::size_t row = static_cast<std::size_t>(idx - (col * blocked_dept - 0.5 * col * col - 0.5 * col));
+
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 82085d465..03a91dd9f 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -22,13 +22,12 @@
 #include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix, plssvm::soa_matrix
 #include "plssvm/shape.hpp"                                    // plssvm::shape
 
-#include <hpx/execution.hpp>                        // hpx::execution::par_unseq
-#include <hpx/parallel/algorithms/for_loop.hpp>     // hpx::experimental::for_loop
+#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
 #include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
 #include <array>      // std::array
 #include <cmath>      // std::fma
 #include <cstddef>    // std::size_t
-#include <utility>    // std::pair, std::make_pair
+#include <numeric>    // std::iota
 #include <vector>     // std::vector
 
 namespace plssvm::hpx::detail {
@@ -54,15 +53,15 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_features * blocked_num_classes);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
-        range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_features * blocked_num_classes);
+    std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [feature, c] = idx;
+        const std::size_t feature = idx / blocked_num_classes;
+        const std::size_t c = idx % blocked_num_classes;
+      
         const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
 
@@ -117,15 +116,15 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_classes);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
-        range[i] = std::make_pair(i / blocked_num_classes, i % blocked_num_classes);
-    });
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_classes);
+    std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [pp, c] = idx;
+        const std::size_t pp = idx / blocked_num_classes;
+        const std::size_t c = idx % blocked_num_classes;
+       
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
 
@@ -189,15 +188,15 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
 
-    // calculate indices over which we parallelize
-    std::vector<std::pair<std::size_t, std::size_t>> range(blocked_num_predict_points * blocked_num_support_vectors);
-    ::hpx::experimental::for_loop(::hpx::execution::par_unseq, 0, range.size(), [&](auto i){
-        range[i] = std::make_pair(i / blocked_num_support_vectors, i % blocked_num_support_vectors);
-    });
+    // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_predict_points * blocked_num_support_vectors);
+    std::iota(range.begin(), range.end(), 0);
 
-    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::pair<std::size_t, std::size_t> idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const auto [pp, sv] = idx;
+        const std::size_t pp = idx / blocked_num_support_vectors;
+        const std::size_t sv = idx % blocked_num_support_vectors;
+
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t sv_idx = sv * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index e45554205..6edcbb145 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -194,5 +194,6 @@ int main(int argc, char *argv[]) {
         std::cerr << e.what() << std::endl;
         return EXIT_FAILURE;
     }
+
     return EXIT_SUCCESS;
 }

From d2157fcad326aaef101b75fca520c2d2ff576073 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:19 +0100
Subject: [PATCH 14/48] First test implementation

---
 tests/backends/CMakeLists.txt         |   9 +-
 tests/backends/HPX/CMakeLists.txt     |  31 ++
 tests/backends/HPX/detail/utility.cpp |  26 ++
 tests/backends/HPX/exceptions.cpp     |  26 ++
 tests/backends/HPX/hpx_csvm.cpp       | 601 ++++++++++++++++++++++++++
 tests/backends/HPX/mock_hpx_csvm.hpp  |  48 ++
 tests/backends/generic_csvm_tests.hpp |  20 +-
 tests/main.cpp                        |   5 +
 8 files changed, 754 insertions(+), 12 deletions(-)
 create mode 100644 tests/backends/HPX/CMakeLists.txt
 create mode 100644 tests/backends/HPX/detail/utility.cpp
 create mode 100644 tests/backends/HPX/exceptions.cpp
 create mode 100644 tests/backends/HPX/hpx_csvm.cpp
 create mode 100644 tests/backends/HPX/mock_hpx_csvm.hpp

diff --git a/tests/backends/CMakeLists.txt b/tests/backends/CMakeLists.txt
index 805e8bc1b..ec6a5fa76 100644
--- a/tests/backends/CMakeLists.txt
+++ b/tests/backends/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -9,6 +9,11 @@ if (TARGET ${PLSSVM_OPENMP_BACKEND_LIBRARY_NAME})
     add_subdirectory(OpenMP)
 endif ()
 
+# create HPX tests if the HPX backend is available
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    add_subdirectory(HPX)
+endif ()
+
 # create stdpar tests if the stdpar backend is available
 if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     add_subdirectory(stdpar)
@@ -32,4 +37,4 @@ endif ()
 # create SYCL tests if the SYCL backend is available
 if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME})
     add_subdirectory(SYCL)
-endif ()
\ No newline at end of file
+endif ()
diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
new file mode 100644
index 000000000..d24f26e83
--- /dev/null
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -0,0 +1,31 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+## create HPX tests
+set(PLSSVM_HPX_TEST_NAME HPX_tests)
+
+# list all necessary sources
+set(PLSSVM_HPX_TEST_SOURCES
+    ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/hpx_csvm.cpp
+)
+
+# add test executable
+add_executable(${PLSSVM_HPX_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_HPX_TEST_SOURCES})
+
+# link against test library
+target_link_libraries(${PLSSVM_HPX_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME})
+
+# add tests to google test
+include(GoogleTest)
+include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake)
+discover_tests_with_death_test_filter(${PLSSVM_HPX_TEST_NAME})
+
+# add test as coverage dependency
+if (TARGET coverage)
+    add_dependencies(coverage ${PLSSVM_HPX_TEST_NAME})
+endif ()
diff --git a/tests/backends/HPX/detail/utility.cpp b/tests/backends/HPX/detail/utility.cpp
new file mode 100644
index 000000000..02e84d300
--- /dev/null
+++ b/tests/backends/HPX/detail/utility.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom utility functions related to the HPX backend.
+ */
+
+#include "plssvm/backends/HPX/detail/utility.hpp"
+
+#include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_NE, EXPECT_NO_THROW, EXPECT_FALSE
+
+#include <regex>   // std::regex, std::regex::extended, std::regex_match
+#include <string>  // std::string
+
+TEST(HPXUtility, get_num_threads) {
+    // Will fail but need to think about a way so set correct result
+    EXPECT_GT(plssvm::hpx::detail::get_num_threads(), 0);
+}
+
+TEST(HPXUtility, get_hpx_version) {
+    EXPECT_FALSE(plssvm::hpx::detail::get_hpx_version().empty());
+}
diff --git a/tests/backends/HPX/exceptions.cpp b/tests/backends/HPX/exceptions.cpp
new file mode 100644
index 000000000..123c1000e
--- /dev/null
+++ b/tests/backends/HPX/exceptions.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the custom exception classes related to the HPX backend.
+ */
+
+#include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
+
+#include "tests/backends/generic_exceptions_tests.hpp"  // generic exception tests to instantiate
+
+#include "gtest/gtest.h"  // INSTANTIATE_TYPED_TEST_SUITE_P
+
+#include <string_view>  // std::string_view
+
+struct exception_test_type {
+    using exception_type = plssvm::hpx::backend_exception;
+    constexpr static std::string_view name = "hpx::backend_exception";
+};
+
+// instantiate type-parameterized tests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXPExceptions, Exception, exception_test_type);
diff --git a/tests/backends/HPX/hpx_csvm.cpp b/tests/backends/HPX/hpx_csvm.cpp
new file mode 100644
index 000000000..6797f2544
--- /dev/null
+++ b/tests/backends/HPX/hpx_csvm.cpp
@@ -0,0 +1,601 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Tests for the functionality related to the HPX backend.
+ */
+
+#include "plssvm/backend_types.hpp"                                                   // plssvm::csvm_to_backend_type_v
+#include "plssvm/backends/HPX/csvm.hpp"                                            // plssvm::hpx::csvm
+#include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
+#include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::device_kernel_symm
+#include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::device_kernel_assembly
+#include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::device_kernel_assembly_symm
+#include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
+#include "plssvm/constants.hpp"                                                       // plssvm::PADDING_SIZE
+#include "plssvm/data_set.hpp"                                                        // plssvm::data_set
+#include "plssvm/detail/arithmetic_type_name.hpp"                                     // plssvm::detail::arithmetic_type_name
+#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/type_list.hpp"                                                // plssvm::detail::supported_label_types
+#include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                          // plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                       // plssvm::parameter, plssvm::detail::parameter, plssvm::kernel_type, plssvm::cost
+#include "plssvm/shape.hpp"                                                           // plssvm::shape
+#include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
+
+#include "tests/backends/generic_csvm_tests.hpp"       // generic CSVM tests to instantiate
+#include "tests/backends/ground_truth.hpp"             // ground_truth::{perform_dimensional_reduction, assemble_device_specific_kernel_matrix, assemble_full_kernel_matrix, gemm, calculate_w}
+#include "tests/backends/HPX/mock_hpx_csvm.hpp"  // mock_hpx_csvm
+#include "tests/custom_test_macros.hpp"                // EXPECT_THROW_WHAT
+#include "tests/naming.hpp"                            // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"                     // util::{cartesian_type_product_t, combine_test_parameters_gtest_t}
+#include "tests/utility.hpp"                           // util::redirect_output
+
+#include "fmt/format.h"   // fmt::format
+#include "gtest/gtest.h"  // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test
+
+#include <algorithm>  // std::min
+#include <cstddef>    // std::size_t
+#include <tuple>      // std::make_tuple, std::tuple
+#include <vector>     // std::vector
+
+class HPXCSVM : public ::testing::Test,
+                   private util::redirect_output<> { };
+
+// check whether the constructor correctly fails when using an incompatible target platform
+TEST_F(HPXCSVM, construct_parameter) {
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // the automatic target platform must always be available
+    EXPECT_NO_THROW(plssvm::hpx::csvm{ plssvm::parameter{} });
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::parameter{} }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+}
+
+TEST_F(HPXCSVM, construct_target_and_parameter) {
+    // create parameter struct
+    const plssvm::parameter params{};
+
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // only automatic or cpu are allowed as target platform for the HPX backend
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::automatic, params }));
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::cpu, params }));
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::automatic, params }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::cpu, params }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+    // all other target platforms must throw
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_nvidia, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_nvidia' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_amd, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_amd' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_intel, params }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_intel' for the HPX backend!");
+}
+
+TEST_F(HPXCSVM, construct_target_and_named_args) {
+#if defined(PLSSVM_HAS_CPU_TARGET)
+    // only automatic or cpu are allowed as target platform for the HPX backend
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }));
+    EXPECT_NO_THROW((plssvm::hpx::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }));
+#else
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!");
+#endif
+
+    // all other target platforms must throw
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_nvidia' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_amd, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_amd' for the HPX backend!");
+    EXPECT_THROW_WHAT((plssvm::hpx::csvm{ plssvm::target_platform::gpu_intel, plssvm::cost = 2.0 }),
+                      plssvm::hpx::backend_exception,
+                      "Invalid target platform 'gpu_intel' for the HPX backend!");
+}
+
+struct hpx_csvm_test_type {
+    using mock_csvm_type = mock_hpx_csvm;
+    using csvm_type = plssvm::hpx::csvm;
+    using device_ptr_type = const plssvm::soa_matrix<plssvm::real_type> *;
+    inline constexpr static auto additional_arguments = std::make_tuple();
+};
+
+using hpx_csvm_test_tuple = std::tuple<hpx_csvm_test_type>;
+using hpx_csvm_test_label_type_list = util::cartesian_type_product_t<hpx_csvm_test_tuple, plssvm::detail::supported_label_types>;
+using hpx_csvm_test_type_list = util::cartesian_type_product_t<hpx_csvm_test_tuple>;
+
+// the tests used in the instantiated GTest test suites
+using hpx_csvm_test_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list>;
+using hpx_solver_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::solver_type_list>;
+using hpx_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::kernel_function_type_list>;
+using hpx_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_type_list, util::solver_and_kernel_function_type_list>;
+using hpx_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_label_type_list, util::kernel_function_and_classification_type_list>;
+using hpx_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t<hpx_csvm_test_label_type_list, util::solver_and_kernel_function_and_classification_type_list>;
+
+// instantiate type-parameterized tests
+// generic CSVM tests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVM, hpx_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMKernelFunction, hpx_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolver, hpx_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolverKernelFunction, hpx_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMKernelFunctionClassification, hpx_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVM, GenericCSVMSolverKernelFunctionClassification, hpx_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name);
+
+// generic CSVM DeathTests
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMDeathTest, hpx_csvm_test_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMSolverDeathTest, hpx_solver_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, hpx_kernel_function_type_gtest, naming::test_parameter_to_name);
+INSTANTIATE_TYPED_TEST_SUITE_P(HPXCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, hpx_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name);
+
+TEST_F(HPXCSVM, blas_level_3_kernel_explicit) {
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    const plssvm::parameter params{ plssvm::gamma = plssvm::real_type{ 0.001 } };
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+    const std::vector<plssvm::real_type> kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data.data(), q_red, QA_cost, dist, 0);
+
+    const auto B = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    auto ground_truth_C{ C };
+
+    const std::size_t num_rhs = B.shape().x;
+    const std::size_t num_rows = B.shape().y;
+    plssvm::hpx::detail::device_kernel_symm(num_rows, num_rhs, alpha, kernel_matrix, B, beta, C);
+
+    // calculate correct results
+    const plssvm::aos_matrix<plssvm::real_type> kernel_matrix_gemm_padded = ground_truth::assemble_full_kernel_matrix(params, data.data(), q_red, QA_cost);
+    ground_truth::gemm(alpha, kernel_matrix_gemm_padded, B, beta, ground_truth_C);
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(C, ground_truth_C);
+}
+
+TEST_F(HPXCSVM, calculate_w) {
+    // the data used for prediction
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // the weights (i.e., alpha values) for all support vectors
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // calculate w
+    plssvm::soa_matrix<plssvm::real_type> w{ plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+    plssvm::hpx::detail::device_kernel_w_linear(w, weights, data.data());
+
+    // calculate correct results
+    const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data.data());
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(w, correct_w);
+}
+
+using kernel_function_type_list_gtest = util::combine_test_parameters_gtest_t<util::kernel_function_type_list>;
+
+template <typename T>
+class HPXCSVMKernelFunction : public HPXCSVM { };
+
+TYPED_TEST_SUITE(HPXCSVMKernelFunction, kernel_function_type_list_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(HPXCSVMKernelFunction, assemble_kernel_matrix_explicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
+    const plssvm::real_type cost = plssvm::real_type{ 1.0 } / params.cost;
+
+    std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red, kernel_matrix, data_matr, QA_cost, cost);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red, kernel_matrix, data_matr, QA_cost, cost, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red, kernel_matrix, data_matr, QA_cost, cost, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+    const std::vector<plssvm::real_type> correct_kernel_matrix = ground_truth::assemble_device_specific_kernel_matrix(params, data_matr, q_red, QA_cost, dist, 0);
+
+    // check for correctness
+    ASSERT_EQ(kernel_matrix.size(), correct_kernel_matrix.size());
+    EXPECT_FLOATING_POINT_VECTOR_NEAR_EPS(kernel_matrix, correct_kernel_matrix, 1e6);
+}
+
+TYPED_TEST(HPXCSVMKernelFunction, blas_level_3_kernel_implicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data_matr);
+    const plssvm::real_type cost = plssvm::real_type{ 1.0 } / params.cost;
+
+    const auto B = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.num_data_points() - 1, data.num_data_points() - 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    auto ground_truth_C{ C };
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red, data_matr, QA_cost, cost, B, beta, C, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+
+    // calculate correct results
+    const plssvm::aos_matrix<plssvm::real_type> kernel_matrix_gemm_padded = ground_truth::assemble_full_kernel_matrix(params, data_matr, q_red, QA_cost);
+    ground_truth::gemm(alpha, kernel_matrix_gemm_padded, B, beta, ground_truth_C);
+
+    // check C for correctness
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(C, ground_truth_C);
+}
+
+TYPED_TEST(HPXCSVMKernelFunction, predict_values) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+    auto data_matr{ data.data() };
+    if constexpr (kernel == plssvm::kernel_function_type::chi_squared) {
+        // chi-squared is well-defined for non-negative values only
+        data_matr = util::matrix_abs(data_matr);
+    }
+
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data_matr.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const auto predict_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data_matr.num_rows(), data_matr.num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+    const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data_matr);
+
+    plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    switch (kernel) {
+        case plssvm::kernel_function_type::linear:
+            plssvm::hpx::detail::device_kernel_predict_linear(out, correct_w, rho, predict_points);
+            break;
+        case plssvm::kernel_function_type::polynomial:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::polynomial>(out, weights, rho, data_matr, predict_points, params.degree, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::rbf:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::rbf>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::sigmoid:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma), params.coef0);
+            break;
+        case plssvm::kernel_function_type::laplacian:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::laplacian>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+        case plssvm::kernel_function_type::chi_squared:
+            plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out, weights, rho, data_matr, predict_points, std::get<plssvm::real_type>(params.gamma));
+            break;
+    }
+
+    // check out for correctness
+    const plssvm::aos_matrix<plssvm::real_type> correct_out = ground_truth::predict_values(params, correct_w, weights, rho, data_matr, predict_points);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR(out, correct_out);
+}
+
+//*************************************************************************************************************************************//
+//                                                           CSVM DeathTests                                                           //
+//*************************************************************************************************************************************//
+
+class HPXCSVMDeathTest : public HPXCSVM { };
+
+TEST_F(HPXCSVMDeathTest, blas_level_3_kernel_explicit) {
+    const plssvm::real_type alpha{ 1.0 };
+
+    // create kernel matrix to use in the BLAS calculation
+    const std::vector<plssvm::real_type> kernel_matrix((4 + plssvm::PADDING_SIZE) * (4 + plssvm::PADDING_SIZE + 1) / 2);
+
+    const auto B = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const plssvm::real_type beta{ 0.5 };
+    auto C = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ 4, 4 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    const std::size_t num_rhs = B.shape().x;
+    const std::size_t num_rows = B.shape().y;
+
+    // the A matrix must have the correct size
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, std::vector<plssvm::real_type>{}, B, beta, C), fmt::format("A matrix sizes mismatch!: 0 != {}", kernel_matrix.size()));
+
+    // the B matrix must have the correct shape
+    const auto B_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B_wrong, beta, C), ::testing::HasSubstr(fmt::format("B matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+
+    // the C matrix must have the correct shape
+    auto C_wrong = util::generate_random_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ std::min<std::size_t>(0ULL, num_rows - 1), std::min<std::size_t>(0ULL, num_rhs - 2) }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_symm(num_rows, num_rows, alpha, kernel_matrix, B, beta, C_wrong), ::testing::HasSubstr(fmt::format("C matrix sizes mismatch!: [{}, {}] != [{}, {}]", std::min(0, static_cast<int>(num_rows) - 1), std::min(0, static_cast<int>(num_rhs) - 2), num_rows, num_rhs)));
+}
+
+TEST_F(HPXCSVMDeathTest, calculate_w) {
+    // the data used for prediction
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // the weights (i.e., alpha values) for all support vectors
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    plssvm::soa_matrix<plssvm::real_type> w(plssvm::shape{ weights.num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+
+    // the weights and support vector matrix shapes must match
+    const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.num_data_points() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_w_linear(w, weights_wrong, data.data()), fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows()));
+    // the w shape must be correct
+    plssvm::soa_matrix<plssvm::real_type> w_wrong{};
+    EXPECT_DEATH(plssvm::hpx::detail::device_kernel_w_linear(w_wrong, weights, data.data()), ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs [{}, {}]!", weights.num_rows(), data.data().num_cols())));
+}
+
+template <typename T>
+class HPXCSVMKernelFunctionDeathTest : public HPXCSVM { };
+
+TYPED_TEST_SUITE(HPXCSVMKernelFunctionDeathTest, kernel_function_type_list_gtest, naming::test_parameter_to_name);
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, assemble_kernel_matrix_explicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    // create correct data for the function call
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    // create correct data distribution for the ground truth calculation
+    const plssvm::detail::triangular_data_distribution dist{ data.num_data_points() - 1, 1 };
+
+    const auto [q_red, QA_cost] = ground_truth::perform_dimensional_reduction(params, data.data());
+
+    // create correct data distribution for the ground truth calculation
+    std::vector<plssvm::real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+
+    // helper lambda to reduce the amount of needed switches!
+    const auto run_assembly = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, std::vector<plssvm::real_type> &kernel_matrix_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::real_type QA_cost_p) {
+        switch (kernel) {
+            case plssvm::kernel_function_type::linear:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::linear>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost);
+                break;
+            case plssvm::kernel_function_type::polynomial:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::polynomial>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::rbf:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::rbf>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::sigmoid:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::sigmoid>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::laplacian:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::laplacian>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::chi_squared:
+                plssvm::hpx::detail::device_kernel_assembly<plssvm::kernel_function_type::chi_squared>(q_red_p, kernel_matrix_p, data_p, QA_cost_p, params_p.cost, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+        }
+    };
+
+    // check q_red size (must be equal to the number of data points - 1
+    EXPECT_DEATH(run_assembly(params, std::vector<plssvm::real_type>{}, kernel_matrix, data.data(), QA_cost), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+
+    // check the kernel matrix size (depending on the usage of GEMM/SYMM)
+    std::vector<plssvm::real_type> ret;
+    EXPECT_DEATH(run_assembly(params, q_red, ret, data.data(), QA_cost), ::testing::HasSubstr(fmt::format("Sizes mismatch (SYMM)!: 0 != {}", kernel_matrix.size())));
+
+    // cost must not be 0.0 since 1.0 / cost is used
+    params.cost = plssvm::real_type{ 0.0 };
+    EXPECT_DEATH(run_assembly(params, q_red, kernel_matrix, data.data(), QA_cost), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+}
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, blas_level_3_kernel_implicit) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    // create correct data for the function call
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    std::vector<plssvm::real_type> q_red{};
+    plssvm::real_type QA_cost{};
+    std::tie(q_red, QA_cost) = ground_truth::perform_dimensional_reduction(params, data.data());
+    const plssvm::real_type alpha{ 1.0 };
+    plssvm::soa_matrix<plssvm::real_type> B{ plssvm::shape{ data.num_classes(), data.num_data_points() - 1 } };
+    const plssvm::real_type beta{ 1.0 };
+    plssvm::soa_matrix<plssvm::real_type> C{ B };
+
+    // helper lambda to reduce the amount of needed switches!
+    const auto run_assembly_symm = [=](const plssvm::parameter &params_p, const std::vector<plssvm::real_type> &q_red_p, const plssvm::soa_matrix<plssvm::real_type> &data_p, const plssvm::soa_matrix<plssvm::real_type> &B_p, plssvm::soa_matrix<plssvm::real_type> &C_p) {
+        switch (kernel) {
+            case plssvm::kernel_function_type::linear:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::linear>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p);
+                break;
+            case plssvm::kernel_function_type::polynomial:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::polynomial>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::rbf:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::rbf>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::sigmoid:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::sigmoid>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                break;
+            case plssvm::kernel_function_type::laplacian:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::laplacian>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+            case plssvm::kernel_function_type::chi_squared:
+                plssvm::hpx::detail::device_kernel_assembly_symm<plssvm::kernel_function_type::chi_squared>(alpha, q_red_p, data_p, QA_cost, params_p.cost, B_p, beta, C_p, std::get<plssvm::real_type>(params_p.gamma));
+                break;
+        }
+    };
+
+    // check q_red size (must be equal to the number of data points - 1
+    EXPECT_DEATH(run_assembly_symm(params, std::vector<plssvm::real_type>{}, data.data(), B, C), fmt::format("Sizes mismatch!: 0 != {}", data.num_data_points() - 1));
+
+    // cost must not be 0.0 since 1.0 / cost is used
+    plssvm::parameter params2{ params };
+    params2.cost = plssvm::real_type{ 0.0 };
+    EXPECT_DEATH(run_assembly_symm(params2, q_red, data.data(), B, C), "cost must not be 0.0 since it is 1 / plssvm::cost!");
+
+    // B and C must be of the same shape
+    B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ 1, 1 } };
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), "The matrices B and C must have the same shape!");
+
+    // the number of columns in B must match the number of rows in the data set - 1
+    B = plssvm::soa_matrix<plssvm::real_type>{ plssvm::shape{ data.num_classes(), data.num_data_points() - 2 } };
+    C = B;
+    EXPECT_DEATH(run_assembly_symm(params, q_red, data.data(), B, C), ::testing::HasSubstr(fmt::format("The number of columns in B ({}) must be the same as the values in q ({})!", B.num_cols(), data.num_data_points() - 1)));
+}
+
+TYPED_TEST(HPXCSVMKernelFunctionDeathTest, predict_values) {
+    constexpr plssvm::kernel_function_type kernel = util::test_parameter_value_at_v<0, TypeParam>;
+
+    plssvm::parameter params{ plssvm::kernel_type = kernel };
+    if constexpr (kernel != plssvm::kernel_function_type::linear) {
+        params.gamma = plssvm::real_type{ 0.001 };
+    }
+    const plssvm::data_set data{ PLSSVM_TEST_FILE };
+
+    const auto weights = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.data().num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const auto predict_points = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+    const std::vector<plssvm::real_type> rho = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+    const plssvm::soa_matrix<plssvm::real_type> w = ground_truth::calculate_w(weights, data.data());
+
+    plssvm::aos_matrix<plssvm::real_type> out{ plssvm::shape{ predict_points.num_rows(), weights.num_rows() }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
+
+    if constexpr (kernel == plssvm::kernel_function_type::linear) {
+        // the number of classes must match
+        std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+        rho_wrong.pop_back();
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out, w, rho_wrong, predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
+
+        // the number of features must match
+        const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out, w, rho, predict_points_wrong),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_cols(), predict_points_wrong.num_cols())));
+
+        // the output shape must match
+        plssvm::aos_matrix<plssvm::real_type> out_wrong{};
+        EXPECT_DEATH(plssvm::hpx::detail::device_kernel_predict_linear(out_wrong, w, rho, predict_points),
+                     ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+    } else {
+        // helper lambda to reduce the amount of needed switches!
+        const auto run_predict_values = [=](const plssvm::parameter &params_p, plssvm::aos_matrix<plssvm::real_type> &out_p, const plssvm::aos_matrix<plssvm::real_type> &weights_p, const std::vector<plssvm::real_type> &rho_p, const plssvm::soa_matrix<plssvm::real_type> &support_vectors_p, const plssvm::soa_matrix<plssvm::real_type> &predict_points_p) {
+            switch (kernel) {
+                case plssvm::kernel_function_type::linear:
+                    // unreachable
+                    break;
+                case plssvm::kernel_function_type::polynomial:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::polynomial>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, params_p.degree, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    break;
+                case plssvm::kernel_function_type::rbf:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::rbf>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+                case plssvm::kernel_function_type::sigmoid:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::sigmoid>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma), params_p.coef0);
+                    break;
+                case plssvm::kernel_function_type::laplacian:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::laplacian>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+                case plssvm::kernel_function_type::chi_squared:
+                    plssvm::hpx::detail::device_kernel_predict<plssvm::kernel_function_type::chi_squared>(out_p, weights_p, rho_p, support_vectors_p, predict_points_p, std::get<plssvm::real_type>(params_p.gamma));
+                    break;
+            }
+        };
+
+        // the number of classes must match
+        std::vector<plssvm::real_type> rho_wrong = util::generate_random_vector<plssvm::real_type>(weights.num_rows());
+        rho_wrong.pop_back();
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho_wrong, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", w.num_rows(), rho_wrong.size())));
+
+        // the number of support vectors and weights must match
+        const auto weights_wrong = util::generate_specific_matrix<plssvm::aos_matrix<plssvm::real_type>>(plssvm::shape{ 3, data.data().num_rows() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(run_predict_values(params, out, weights_wrong, rho, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", weights_wrong.num_cols(), data.data().num_rows())));
+
+        // the number of features must match
+        const auto predict_points_wrong = util::generate_specific_matrix<plssvm::soa_matrix<plssvm::real_type>>(plssvm::shape{ data.data().num_rows(), data.data().num_cols() + 1 }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE });
+        EXPECT_DEATH(run_predict_values(params, out, weights, rho, data.data(), predict_points_wrong),
+                     ::testing::HasSubstr(fmt::format("Size mismatch: {} vs {}!", data.data().num_cols(), predict_points_wrong.num_cols())));
+
+        // the output shape must match
+        plssvm::aos_matrix<plssvm::real_type> out_wrong{};
+        EXPECT_DEATH(run_predict_values(params, out_wrong, weights, rho, data.data(), predict_points),
+                     ::testing::HasSubstr(fmt::format("Shape mismatch: [0, 0] vs {}!", (plssvm::shape{ predict_points.num_rows(), w.num_rows() }))));
+    }
+}
diff --git a/tests/backends/HPX/mock_hpx_csvm.hpp b/tests/backends/HPX/mock_hpx_csvm.hpp
new file mode 100644
index 000000000..e6263393d
--- /dev/null
+++ b/tests/backends/HPX/mock_hpx_csvm.hpp
@@ -0,0 +1,48 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief MOCK class for the C-SVM class using the HPX backend.
+ */
+
+#ifndef PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
+#define PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
+#pragma once
+
+#include "plssvm/backends/HPX/csvm.hpp"  // plssvm::hpx::csvm
+
+/**
+ * @brief GTest mock class for the HPX CSVM.
+ */
+class mock_hpx_csvm final : public plssvm::hpx::csvm {
+    using base_type = plssvm::hpx::csvm;
+
+  public:
+    template <typename... Args>
+    explicit mock_hpx_csvm(Args &&...args) :
+        base_type{ std::forward<Args>(args)... } { }
+
+    // make protected member functions public
+    using base_type::assemble_kernel_matrix;
+    using base_type::blas_level_3;
+    using base_type::get_device_memory;
+    using base_type::num_available_devices;
+
+    using base_type::predict_values;
+
+    using base_type::conjugate_gradients;
+    using base_type::perform_dimensional_reduction;
+    using base_type::run_blas_level_3;
+    using base_type::solve_lssvm_system_of_linear_equations;
+
+    using base_type::get_max_mem_alloc_size;
+
+    using base_type::data_distribution_;
+};
+
+#endif  // PLSSVM_TESTS_BACKENDS_HPX_MOCK_HPX_CSVM_HPP_
diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp
index 562785728..dec8a7ad6 100644
--- a/tests/backends/generic_csvm_tests.hpp
+++ b/tests/backends/generic_csvm_tests.hpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -85,8 +86,8 @@ template <typename csvm_type, typename device_ptr_type, typename matrix_type, ty
         return partial_kernel_matrix;
     };
 
-    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
-        // only a single device for OpenMP on the CPU
+    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
+        // only a single device for OpenMP, stdpar, and HPX on the CPU
         result[0] = plssvm::detail::move_only_any{ calculate_partial_kernel_matrix(0, matr.num_rows()) };
     } else {
         for (std::size_t device_id = 0; device_id < csvm.num_available_devices(); ++device_id) {
@@ -131,8 +132,8 @@ template <typename csvm_type, typename device_ptr_type, typename matrix_type, ty
     matr = matrix_type{ matr, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } };
 
     for (std::size_t device_id = 0; device_id < csvm.num_available_devices(); ++device_id) {
-        // created matrix is different for the OpenMP backend and the GPU backends!
-        if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+        // created matrix is different for the OpenMP, stdpar or HPX backend and the GPU backends!
+        if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
             // only a single device ever in use
             result[0] = plssvm::detail::move_only_any{ std::make_tuple(plssvm::soa_matrix<real_type>{ matr, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }, std::forward<Args>(args)...) };
         } else {
@@ -277,7 +278,7 @@ TYPED_TEST_P(GenericCSVM, num_available_devices) {
     const csvm_type svm = util::construct_from_tuple<csvm_type>(csvm_test_type::additional_arguments);
 
     // the maximum memory allocation size should be greater than 0!
-    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+    if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
         EXPECT_EQ(svm.num_available_devices(), 1);
     } else {
         EXPECT_GE(svm.num_available_devices(), 1);
@@ -920,7 +921,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
 
                 // get result based on used backend
                 std::vector<plssvm::real_type> kernel_matrix{};
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     kernel_matrix = plssvm::detail::move_only_any_cast<std::vector<plssvm::real_type>>(kernel_matrix_d[device_id]);  // std::vector
                 } else {
                     const auto &kernel_matrix_d_ptr = plssvm::detail::move_only_any_cast<const device_ptr_type &>(kernel_matrix_d[device_id]);  // device_ptr -> convert it to a std::vector
@@ -947,8 +948,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
                 EXPECT_TRUE(kernel_matrix_d[device_id].has_value());
 
                 // implicit doesn't assemble a kernel matrix!
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
-                    const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {                    const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
 
                     // the values should not have changed! (except the matrix layout)
                     EXPECT_EQ(params_ret, params);
@@ -1030,7 +1030,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
 
                 // get result based on used backend
                 std::vector<plssvm::real_type> kernel_matrix{};
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     kernel_matrix = plssvm::detail::move_only_any_cast<std::vector<plssvm::real_type>>(kernel_matrix_d[device_id]);  // std::vector
                 } else {
                     const auto &kernel_matrix_d_ptr = plssvm::detail::move_only_any_cast<const device_ptr_type &>(kernel_matrix_d[device_id]);  // device_ptr -> convert it to a std::vector
@@ -1057,7 +1057,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) {
                 EXPECT_TRUE(kernel_matrix_d[device_id].has_value());
 
                 // implicit doesn't assemble a kernel matrix!
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar) {
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
                     const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
 
                     // the values should not have changed! (except the matrix layout)
diff --git a/tests/main.cpp b/tests/main.cpp
index 0623c7a26..d9673a11d 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -15,6 +15,11 @@
 
 #include <cstdlib>  // std::atexit
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+#include <hpx/hpx_main.hpp>
+//#include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
+//#include <hpx/execution.hpp>                                    // hpx::post
+#endif
 // silence GTest warnings/test errors
 
 // generic CSVM tests

From 7838cc4db5adc26774e16a5bcc43d53d533964ee Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 15/48] Wrap HPX runtime functions in backend

---
 include/plssvm/backends/HPX/detail/utility.hpp | 10 ++++++++++
 src/main_predict.cpp                           | 12 +++---------
 src/main_train.cpp                             | 12 +++---------
 src/plssvm/backends/HPX/detail/utility.cpp     | 16 +++++++++++++++-
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 120a8a4de..a5a60d76a 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -34,6 +34,16 @@ using boost::atomic_ref;
  */
 [[nodiscard]] std::string get_hpx_version();
 
+/**
+ * @brief Start the runtime of the HPX backend.
+ */
+void start_hpx_runtime();
+
+/**
+ * @brief Stop the runtime of the HPX backend.
+ */
+void stop_hpx_runtime();
+
 }  // namespace plssvm::hpx::detail
 
 #endif  // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 6edcbb145..65c8a7876 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -21,8 +21,7 @@
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
-    #include <hpx/execution.hpp>                                    // hpx::post
+    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::start_hpx_runtime, plssvm::hpx::detail::stop_hpx_runtime
 #endif
 #include "fmt/format.h"  // fmt::print
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
@@ -74,9 +73,7 @@ int main(int argc, char *argv[]) {
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
         if (use_hpx_as_backend){
-            // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
-            // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
-            hpx::start(nullptr, 0, nullptr);
+            plssvm::hpx::detail::start_hpx_runtime();
         }
 #endif
         // create data set
@@ -181,10 +178,7 @@ int main(int argc, char *argv[]) {
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         if (use_hpx_as_backend){
-            // Finalize all existing HPX tasks
-            hpx::post([]{hpx::finalize();});
-            // Stop HPX runtime
-            hpx::stop();
+            plssvm::hpx::detail::stop_hpx_runtime();
         }
 #endif
     } catch (const plssvm::exception &e) {
diff --git a/src/main_train.cpp b/src/main_train.cpp
index d7c8ce105..b471fec17 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -20,8 +20,7 @@
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
-    #include <hpx/execution.hpp>                                    // hpx::post
+    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::start_hpx_runtime, plssvm::hpx::detail::stop_hpx_runtime
 #endif
 #include <algorithm>    // std::for_each
 #include <chrono>       // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace
@@ -71,9 +70,7 @@ int main(int argc, char *argv[]) {
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
         if (use_hpx_as_backend){
-            // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
-            // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
-            hpx::start(nullptr, 0, nullptr);
+            plssvm::hpx::detail::start_hpx_runtime();
         }
 #endif
         // create data set
@@ -138,10 +135,7 @@ int main(int argc, char *argv[]) {
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         if (use_hpx_as_backend){
-            // Finalize all existing HPX tasks
-            hpx::post([]{hpx::finalize();});
-            // Stop HPX runtime
-            hpx::stop();
+            plssvm::hpx::detail::stop_hpx_runtime();
         }
 #endif
     } catch (const plssvm::exception &e) {
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index c6cdcb970..681f95fc1 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -7,6 +7,8 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 #include <hpx/runtime_distributed.hpp>
+#include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
+#include <hpx/execution.hpp>                                    // hpx::post
 #include "plssvm/backends/HPX/detail/utility.hpp"
 
 #include "plssvm/detail/string_utility.hpp"  // plssvm::detail::as_lower_case
@@ -24,9 +26,21 @@ std::string get_hpx_version() {
     return "unknown";
 }
 
-// TODO: implement function
 int get_num_threads() {
     // get the number of used HPX threads
     return static_cast<int>(::hpx::get_num_worker_threads());
 }
+
+void start_hpx_runtime() {
+    // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
+    // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
+    ::hpx::start(nullptr, 0, nullptr);
+}
+
+void stop_hpx_runtime() {
+   // Finalize all existing HPX tasks
+   ::hpx::post([]{::hpx::finalize();});
+   // Stop HPX runtime
+   ::hpx::stop();
+}
 }  // namespace plssvm::hpx::detail

From 65f37ae462127da7143355f732c5552ac6d3a0bd Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 16/48] Add HPX tests in more files

---
 tests/backend_types.cpp | 6 ++++++
 tests/csvm.cpp          | 8 ++++++++
 tests/csvm_factory.cpp  | 5 ++++-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp
index 9975fbbfc..331ab11a3 100644
--- a/tests/backend_types.cpp
+++ b/tests/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -34,6 +35,7 @@ TEST(BackendType, to_string) {
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::automatic, "automatic");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::openmp, "openmp");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::stdpar, "stdpar");
+    EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hpx, "hpx");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::cuda, "cuda");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hip, "hip");
     EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::opencl, "opencl");
@@ -54,6 +56,8 @@ TEST(BackendType, from_string) {
     EXPECT_CONVERSION_FROM_STRING("AUTO", plssvm::backend_type::automatic);
     EXPECT_CONVERSION_FROM_STRING("openmp", plssvm::backend_type::openmp);
     EXPECT_CONVERSION_FROM_STRING("OpenMP", plssvm::backend_type::openmp);
+    EXPECT_CONVERSION_FROM_STRING("hpx", plssvm::backend_type::hpx);
+    EXPECT_CONVERSION_FROM_STRING("HPX", plssvm::backend_type::hpx);
     EXPECT_CONVERSION_FROM_STRING("stdpar", plssvm::backend_type::stdpar);
     EXPECT_CONVERSION_FROM_STRING("STDPAR", plssvm::backend_type::stdpar);
     EXPECT_CONVERSION_FROM_STRING("cuda", plssvm::backend_type::cuda);
@@ -138,6 +142,7 @@ TEST(BackendType, csvm_to_backend_type) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::openmp::csvm>::value, plssvm::backend_type::openmp);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::stdpar::csvm>::value, plssvm::backend_type::stdpar);
+    EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hpx::csvm>::value, plssvm::backend_type::hpx);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const plssvm::cuda::csvm>::value, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hip::csvm &>::value, plssvm::backend_type::hip);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const plssvm::opencl::csvm &>::value, plssvm::backend_type::opencl);
@@ -153,6 +158,7 @@ TEST(BackendType, csvm_to_backend_type_v) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::openmp::csvm>, plssvm::backend_type::openmp);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::stdpar::csvm>, plssvm::backend_type::stdpar);
+    EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hpx::csvm>, plssvm::backend_type::hpx);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const plssvm::cuda::csvm>, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hip::csvm &>, plssvm::backend_type::hip);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const plssvm::opencl::csvm &>, plssvm::backend_type::opencl);
diff --git a/tests/csvm.cpp b/tests/csvm.cpp
index c1ec5b47b..9934eaef3 100644
--- a/tests/csvm.cpp
+++ b/tests/csvm.cpp
@@ -198,6 +198,14 @@ TEST(BaseCSVM, csvm_backend_exists) {
     EXPECT_FALSE(plssvm::csvm_backend_exists<plssvm::openmp::csvm>::value);
 #endif
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    EXPECT_TRUE(plssvm::csvm_backend_exists_v<plssvm::hpx::csvm>);
+    EXPECT_TRUE(plssvm::csvm_backend_exists<plssvm::hpx::csvm>::value);
+#else
+    EXPECT_FALSE(plssvm::csvm_backend_exists_v<plssvm::hpx::csvm>);
+    EXPECT_FALSE(plssvm::csvm_backend_exists<plssvm::hpx::csvm>::value);
+#endif
+
 #if defined(PLSSVM_HAS_CUDA_BACKEND)
     EXPECT_TRUE(plssvm::csvm_backend_exists_v<plssvm::cuda::csvm>);
     EXPECT_TRUE(plssvm::csvm_backend_exists<plssvm::cuda::csvm>::value);
diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp
index cb06f6b68..8daba37d0 100644
--- a/tests/csvm_factory.cpp
+++ b/tests/csvm_factory.cpp
@@ -30,7 +30,7 @@
 
 namespace util {
 
-using csvm_types = std::tuple<plssvm::openmp::csvm, plssvm::stdpar::csvm, plssvm::cuda::csvm, plssvm::hip::csvm, plssvm::opencl::csvm, plssvm::sycl::csvm>;
+using csvm_types = std::tuple<plssvm::openmp::csvm, plssvm::hpx::csvm, plssvm::stdpar::csvm, plssvm::cuda::csvm, plssvm::hip::csvm, plssvm::opencl::csvm, plssvm::sycl::csvm>;
 using csvm_types_gtest = util::combine_test_parameters_gtest_t<util::cartesian_type_product_t<csvm_types>>;
 /// A type list of all supported SYCL C-SVMs.
 using sycl_csvm_types = std::tuple<plssvm::sycl::csvm, plssvm::adaptivecpp::csvm, plssvm::dpcpp::csvm>;
@@ -43,6 +43,9 @@ namespace testing::internal {  // dirty hack to have type names for incomplete t
 template <>
 std::string GetTypeName<util::test_parameter<util::type_list<plssvm::openmp::csvm>, util::value_list<>>>() { return "openmp_csvm"; }
 
+template <>
+std::string GetTypeName<util::test_parameter<util::type_list<plssvm::hpx::csvm>, util::value_list<>>>() { return "hpx_csvm"; }
+
 template <>
 std::string GetTypeName<util::test_parameter<util::type_list<plssvm::stdpar::csvm>, util::value_list<>>>() { return "stdpar_csvm"; }
 

From 7fa0b09ee15f0c9e73b441e1a29789c5f1bffc35 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 17/48] Add HPX get version function

---
 include/plssvm/backends/HPX/detail/utility.hpp |  2 +-
 src/plssvm/backends/HPX/CMakeLists.txt         |  2 +-
 src/plssvm/backends/HPX/detail/utility.cpp     | 16 +++++-----------
 tests/backends/HPX/detail/utility.cpp          |  2 --
 tests/backends/generic_csvm_tests.hpp          |  3 ++-
 tests/main.cpp                                 |  2 --
 6 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index a5a60d76a..0904bc5e3 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -14,7 +14,7 @@
 #define PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
 #pragma once
 
-    #include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
+#include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
 
 #include <string>  // std::string
 
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index 16b3691e5..6f3585e08 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -40,7 +40,7 @@ set(PLSSVM_HPX_SOURCES
 # set target properties
 set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
 add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
-target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main HPX::iostreams_component)
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main)
 
 # additional compilation flags
 target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index 681f95fc1..717de95d9 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -6,24 +6,18 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  */
-#include <hpx/runtime_distributed.hpp>
-#include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
-#include <hpx/execution.hpp>                                    // hpx::post
+#include <hpx/hpx_start.hpp>                 // hpx::{start, stop, finalize}
+#include <hpx/execution.hpp>                 // hpx::post
+#include <hpx/runtime_distributed.hpp>       // ::hpx::get_num_worker_threads
+#include <hpx/version.hpp>                   // ::hpx::full_version_as_string
 #include "plssvm/backends/HPX/detail/utility.hpp"
 
-#include "plssvm/detail/string_utility.hpp"  // plssvm::detail::as_lower_case
-#include "plssvm/detail/utility.hpp"         // ::plssvm::detail::contains
-#include "plssvm/target_platforms.hpp"       // plssvm::target_platforms
-
-#include "fmt/format.h"  // fmt::format
-
 #include <string>  // std::string
 
 namespace plssvm::hpx::detail {
 
-// TODO: implement function
 std::string get_hpx_version() {
-    return "unknown";
+    return ::hpx::full_version_as_string();
 }
 
 int get_num_threads() {
diff --git a/tests/backends/HPX/detail/utility.cpp b/tests/backends/HPX/detail/utility.cpp
index 02e84d300..16647745a 100644
--- a/tests/backends/HPX/detail/utility.cpp
+++ b/tests/backends/HPX/detail/utility.cpp
@@ -13,11 +13,9 @@
 
 #include "gtest/gtest.h"  // TEST, EXPECT_EQ, EXPECT_NE, EXPECT_NO_THROW, EXPECT_FALSE
 
-#include <regex>   // std::regex, std::regex::extended, std::regex_match
 #include <string>  // std::string
 
 TEST(HPXUtility, get_num_threads) {
-    // Will fail but need to think about a way so set correct result
     EXPECT_GT(plssvm::hpx::detail::get_num_threads(), 0);
 }
 
diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp
index dec8a7ad6..58201bdee 100644
--- a/tests/backends/generic_csvm_tests.hpp
+++ b/tests/backends/generic_csvm_tests.hpp
@@ -948,7 +948,8 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) {
                 EXPECT_TRUE(kernel_matrix_d[device_id].has_value());
 
                 // implicit doesn't assemble a kernel matrix!
-                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {                    const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
+                if constexpr (plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::openmp || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::stdpar || plssvm::csvm_to_backend_type_v<csvm_type> == plssvm::backend_type::hpx) {
+                    const auto &[data_d_ret, params_ret, q_red_ret, QA_cost_ret] = plssvm::detail::move_only_any_cast<const std::tuple<plssvm::soa_matrix<plssvm::real_type>, plssvm::parameter, std::vector<plssvm::real_type>, plssvm::real_type> &>(kernel_matrix_d[device_id]);
 
                     // the values should not have changed! (except the matrix layout)
                     EXPECT_EQ(params_ret, params);
diff --git a/tests/main.cpp b/tests/main.cpp
index d9673a11d..22ea4689c 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -17,8 +17,6 @@
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
 #include <hpx/hpx_main.hpp>
-//#include <hpx/hpx_start.hpp>                                    // hpx::{start, stop, finalize}
-//#include <hpx/execution.hpp>                                    // hpx::post
 #endif
 // silence GTest warnings/test errors
 

From 18389a3012222f587262082d813e1cf63c7ec69d Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 18/48] Increment number of backends to account for HPX

---
 tests/backend_types.cpp | 2 +-
 tests/csvm_factory.cpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp
index 331ab11a3..fb9d3a565 100644
--- a/tests/backend_types.cpp
+++ b/tests/backend_types.cpp
@@ -44,7 +44,7 @@ TEST(BackendType, to_string) {
 
 TEST(BackendType, to_string_unknown) {
     // check conversions to std::string from unknown backend_type
-    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(7), "unknown");
+    EXPECT_CONVERSION_TO_STRING(static_cast<plssvm::backend_type>(8), "unknown");
 }
 
 // check whether the std::string -> plssvm::backend_type conversions are correct
diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp
index 8daba37d0..12f3f3606 100644
--- a/tests/csvm_factory.cpp
+++ b/tests/csvm_factory.cpp
@@ -234,7 +234,7 @@ TEST(CSVMFactory, factory_named_parameter) {
 }
 
 TEST(CSVMFactory, invalid_backend) {
-    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(7)),
+    EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast<plssvm::backend_type>(8)),
                       plssvm::unsupported_backend_exception,
                       "Unrecognized backend provided!");
 }

From 1532500371be9a9161bdb3908222236444f99970 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 19/48] Reformulate index computation and remove conversion
 warnings

---
 include/plssvm/backends/HPX/detail/utility.hpp      | 13 +++++++++++++
 .../kernel/cg_explicit/kernel_matrix_assembly.hpp   |  6 +++---
 .../cg_implicit/kernel_matrix_assembly_blas.hpp     |  4 ++--
 src/main_predict.cpp                                | 10 ++--------
 src/main_train.cpp                                  | 11 ++---------
 src/plssvm/backends/HPX/CMakeLists.txt              |  2 +-
 tests/main.cpp                                      |  8 +++++++-
 7 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 0904bc5e3..d2d3be36f 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -44,6 +44,19 @@ void start_hpx_runtime();
  */
 void stop_hpx_runtime();
 
+/**
+ * @brief Scope Guard that leverages RAII to start and correctly teardown the HPX runtime even in case of exceptions.
+ */
+struct scope_guard
+{
+  scope_guard(){
+    start_hpx_runtime();
+  }
+  ~scope_guard()
+  {
+    stop_hpx_runtime();
+  }
+};
 }  // namespace plssvm::hpx::detail
 
 #endif  // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index f94ff1c1a..f73570c0f 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -61,9 +61,9 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
 
  ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(0.5 * (2 * blocked_dept + 1 - std::sqrt(4 * blocked_dept * blocked_dept + 4 * blocked_dept - 8 * idx + 1)));
-        const std::size_t row = static_cast<std::size_t>(idx - (col * blocked_dept - 0.5 * col * col - 0.5 * col));
-       
+        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
+  
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 5c1cd6d3f..0b41909ce 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -75,8 +75,8 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
 
     ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
-        const std::size_t col = static_cast<std::size_t>(0.5 * (2 * blocked_dept + 1 - std::sqrt(4 * blocked_dept * blocked_dept + 4 * blocked_dept - 8 * idx + 1)));
-        const std::size_t row = static_cast<std::size_t>(idx - (col * blocked_dept - 0.5 * col * col - 0.5 * col));
+        const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
+        const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
 
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 65c8a7876..931f41a77 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -21,7 +21,7 @@
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::start_hpx_runtime, plssvm::hpx::detail::stop_hpx_runtime
+    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
 #endif
 #include "fmt/format.h"  // fmt::print
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
@@ -73,7 +73,7 @@ int main(int argc, char *argv[]) {
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
         if (use_hpx_as_backend){
-            plssvm::hpx::detail::start_hpx_runtime();
+            hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
         }
 #endif
         // create data set
@@ -175,12 +175,6 @@ int main(int argc, char *argv[]) {
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
-
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-        if (use_hpx_as_backend){
-            plssvm::hpx::detail::stop_hpx_runtime();
-        }
-#endif
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
diff --git a/src/main_train.cpp b/src/main_train.cpp
index b471fec17..44aad5a21 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -20,7 +20,7 @@
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::start_hpx_runtime, plssvm::hpx::detail::stop_hpx_runtime
+    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
 #endif
 #include <algorithm>    // std::for_each
 #include <chrono>       // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace
@@ -51,7 +51,6 @@ int main(int argc, char *argv[]) {
         hws::system_hardware_sampler sampler{ PLSSVM_HARDWARE_SAMPLING_INTERVAL };
         sampler.start_sampling();
 #endif
-
         // parse SVM parameter from command line
         plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
 
@@ -70,7 +69,7 @@ int main(int argc, char *argv[]) {
 #if defined(PLSSVM_HAS_HPX_BACKEND)
         const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
         if (use_hpx_as_backend){
-            plssvm::hpx::detail::start_hpx_runtime();
+            hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
         }
 #endif
         // create data set
@@ -132,12 +131,6 @@ int main(int argc, char *argv[]) {
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
-
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-        if (use_hpx_as_backend){
-            plssvm::hpx::detail::stop_hpx_runtime();
-        }
-#endif
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index 6f3585e08..951722bd6 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -40,7 +40,7 @@ set(PLSSVM_HPX_SOURCES
 # set target properties
 set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
 add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
-target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main)
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx)
 
 # additional compilation flags
 target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
diff --git a/tests/main.cpp b/tests/main.cpp
index 22ea4689c..992a0be31 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -16,7 +16,7 @@
 #include <cstdlib>  // std::atexit
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-#include <hpx/hpx_main.hpp>
+    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
 #endif
 // silence GTest warnings/test errors
 
@@ -66,5 +66,11 @@ int main(int argc, char **argv) {
 #if !defined(_WIN32)
     ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    // create std::unique_ptr containing a plssvm::hpx::detail::scope_guard
+    // -> used to automatically handle HPX runtime initialization and finalization
+    std::unique_ptr<plssvm::hpx::detail::scope_guard> hpx_guard{};
+    hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
+#endif 
     return RUN_ALL_TESTS();
 }

From 2c05c50b530d6cf8f43fd561a5ae2389cf4194c8 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:20 +0100
Subject: [PATCH 20/48] Add HPX string to parser doc and tests

---
 include/plssvm/detail/cmd/parser_predict.hpp | 2 +-
 include/plssvm/detail/cmd/parser_train.hpp   | 2 +-
 tests/detail/cmd/parser_predict.cpp          | 2 +-
 tests/detail/cmd/parser_train.cpp            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp
index 2a114f0f0..2b96416ae 100644
--- a/include/plssvm/detail/cmd/parser_predict.hpp
+++ b/include/plssvm/detail/cmd/parser_predict.hpp
@@ -37,7 +37,7 @@ struct parser_predict {
      */
     parser_predict(int argc, char **argv);
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp
index 70f0c03e2..a723fa82e 100644
--- a/include/plssvm/detail/cmd/parser_train.hpp
+++ b/include/plssvm/detail/cmd/parser_train.hpp
@@ -53,7 +53,7 @@ struct parser_train {
     /// The multi-class classification strategy used.
     classification_type classification{ classification_type::oaa };
 
-    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, stdpar, CUDA, HIP, OpenCL, or SYCL.
+    /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, or SYCL.
     backend_type backend{ backend_type::automatic };
     /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel.
     target_platform target{ target_platform::automatic };
diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp
index 8a04c3b1d..0a4da53d9 100644
--- a/tests/detail/cmd/parser_predict.cpp
+++ b/tests/detail/cmd/parser_predict.cpp
@@ -168,7 +168,7 @@ TEST_P(ParserPredictBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
                 naming::pretty_print_parameter_flag_and_value<ParserPredictBackend>);
 // clang-format on
 
diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp
index ba1392d75..ae1eafbaf 100644
--- a/tests/detail/cmd/parser_train.cpp
+++ b/tests/detail/cmd/parser_train.cpp
@@ -443,7 +443,7 @@ TEST_P(ParserTrainBackend, parsing) {
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainBackend, ::testing::Combine(
                 ::testing::Values("-b", "--backend"),
-                ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")),
+                ::testing::Values("automatic", "OpenMP", "HPX", "CUDA", "HIP", "OpenCL", "SYCL")),
                 naming::pretty_print_parameter_flag_and_value<ParserTrainBackend>);
 // clang-format on
 

From 2e7a0b3773bf65dd93165f029dac344b0985a230 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 21/48] Add HPX backend existence at various places

---
 .clang-format           |  2 +-
 README.md               |  6 ++++
 docs/resources/dirs.dox | 62 ++++++++++++++++++++++++++++++++++++++++-
 include/plssvm/core.hpp |  6 ++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/.clang-format b/.clang-format
index 30a5ef1db..a057d0bef 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   - Regex: '^"plssvm/'
     Priority: 1
-  - Regex: '^"(cuda|hip|CL|sycl|omp)'
+  - Regex: '^"(cuda|hip|CL|sycl|omp|hpx)'
     Priority: 2
   - Regex: '^"(tests|bindings)/'
     Priority: 3
diff --git a/README.md b/README.md
index 566ac248a..425fe478e 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ The main highlights of our SVM implementations are:
 1. Drop-in replacement for LIBSVM's `svm-train`, `svm-predict`, and `svm-scale` (some features currently not implemented).
 2. Support of multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to target GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel:
    - [OpenMP](https://www.openmp.org/)
+   - [HPX](https://hpx.stellar-group.org/)
    - [stdpar](https://en.cppreference.com/w/cpp/algorithm) (supported implementations are [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK, [roc-stdpar](https://github.com/ROCm/roc-stdpar) as a patched LLVM, [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler, [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/) using TBB). <br>
      **Note**: due to the nature of the used USM mechanics in the `stdpar` implementations, the `stdpar` backend **can't** be enabled together with **any** other backend! <br>
      **Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged in favor of `target_compile_options`.
@@ -105,6 +106,10 @@ Additional dependencies for the stdpar backend:
 
 - compiler with stdpar support
 
+Additional dependencies for the HPX backend:
+
+- [HPX ≥ v1.9](https://hpx.stellar-group.org/)
+
 Additional dependencies for the CUDA backend:
 
 - CUDA SDK
@@ -596,6 +601,7 @@ Note that during CMake configuration it is guaranteed that at least one of the a
 The `--target_platform=automatic` option works for the different backends as follows:
 
 - `OpenMP`: always selects a CPU
+- `HPX`: always selects a CPU
 - `CUDA`: always selects an NVIDIA GPU (if no NVIDIA GPU is available, throws an exception)
 - `HIP`: always selects an AMD GPU (if no AMD GPU is available, throws an exception)
 - `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU
diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index 8c3119aab..671970e41 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -329,6 +329,66 @@
  * @brief Directory containing kernel implementations for the implicit CG algorithm using the stdpar backend.
  */
 
+ /**
+ * @dir include/plssvm/backends/hpx
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing the implementation for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/hpx/detail
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @authir Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing implementation details for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/hpx/kernel
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing all kernels for the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/hpx/kernel/cg_explicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the explicit CG algorithm using the HPX backend.
+ */
+
+/**
+ * @dir include/plssvm/backends/hpx/kernel/cg_implicit
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack 
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Directory containing kernel implementations for the implicit CG algorithm using the HPX backend.
+ */
+
 /**
  * @dir include/plssvm/backends/SYCL
  * @author Alexander Van Craen
@@ -504,4 +564,4 @@
  *          See the LICENSE.md file in the project root for full license information.
  *
  * @brief Directory containing compile-time constant meta data for git specific information.
- */
\ No newline at end of file
+ */
diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp
index 4311a189f..4e1fd1be1 100644
--- a/include/plssvm/core.hpp
+++ b/include/plssvm/core.hpp
@@ -76,6 +76,12 @@ namespace plssvm::openmp { }
 /// Namespace containing OpenMP backend specific implementation details. **Should not** directly be used by users.
 namespace plssvm::openmp::detail { }
 
+/// Namespace containing the C-SVM using the HPX backend.
+namespace plssvm::hpx { }
+
+/// Namespace containing HPX backend specific implementation details. **Should not** directly be used by users.
+namespace plssvm::hpx::detail { }
+
 /// Namespace containing the C-SVM using the stdpar backend.
 namespace plssvm::stdpar { }
 

From 927c6195ddc926c6324c1997b945367f3feb1ba5 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 22/48] Add Python bindings for HPX backend

---
 bindings/Python/CMakeLists.txt        |  7 +++-
 bindings/Python/README.md             |  8 ++--
 bindings/Python/backend_types.cpp     |  2 +
 bindings/Python/backends/hpx_csvm.cpp | 57 +++++++++++++++++++++++++++
 bindings/Python/main.cpp              |  5 +++
 5 files changed, 73 insertions(+), 6 deletions(-)
 create mode 100644 bindings/Python/backends/hpx_csvm.cpp

diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt
index 5bead042a..f951f77a4 100644
--- a/bindings/Python/CMakeLists.txt
+++ b/bindings/Python/CMakeLists.txt
@@ -1,4 +1,4 @@
-## Authors: Alexander Van Craen, Marcel Breyer
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
 ## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
 ## License: This file is part of the PLSSVM project which is released under the MIT license.
 ##          See the LICENSE.md file in the project root for full license information.
@@ -68,6 +68,9 @@ endif ()
 if (TARGET ${PLSSVM_OPENMP_BACKEND_LIBRARY_NAME})
     list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/openmp_csvm.cpp)
 endif ()
+if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME})
+    list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/hpx_csvm.cpp)
+endif ()
 if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME})
     
     # AdaptiveCpp stdpar only support on the CPU when using our Python bindings
@@ -125,4 +128,4 @@ target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$<COMPILE_LANG_AND_I
 target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -fPIC)
 
 # append pybind11 bindings library to installed targets
-append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME})
\ No newline at end of file
+append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME})
diff --git a/bindings/Python/README.md b/bindings/Python/README.md
index 40648cca7..cc21104f5 100644
--- a/bindings/Python/README.md
+++ b/bindings/Python/README.md
@@ -10,7 +10,7 @@
         - [plssvm.Parameter](#plssvmparameter)
         - [plssvm.DataSet](#plssvmdataset)
         - [plssvm.CSVM](#plssvmcsvm)
-        - [plssvm.openmp.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm)
+        - [plssvm.openmp.CSVM, plssvm.hpx.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmhpxcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm)
         - [plssvm.Model](#plssvmmodel)
         - [plssvm.Version](#plssvmversion)
         - [plssvm.environment.ScopeGuard](#plssvmenvironmentscopeguard)
@@ -196,7 +196,7 @@ The following table lists all PLSSVM enumerations exposed on the Python side:
 | `FileFormatType`       | `LIBSVM`, `ARFF`                                                     | The different supported file format types (default: `LIBSVM`).                                                                                                                                                                                              |
 | `GammaCoefficientType` | `AUTOMATIC`, `SCALE`                                                 | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`).                                                                                                                                                                               |
 | `ClassificationType`   | `OAA`, `OAO`                                                         | The different supported multi-class classification strategies (default: `LIBSVM`).                                                                                                                                                                          |
-| `BackendType`          | `AUTOMATIC`, `OPENMP`, `CUDA`, `HIP`, `OPENCL`, `SYCL`               | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
+| `BackendType`          | `AUTOMATIC`, `OPENMP`, `HPX`, `CUDA`, `HIP`, `OPENCL`, `SYCL`        | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform.                                                                                                              |
 | `VerbosityLevel`       | `QUIET`, `LIBSVM`, `TIMING`, `FULL`                                  | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. |
 | `Status`               | `UNINITIALIZED`, `INITIALIZED`, `FINALIZED`, `UNNECESSARY`           | The different environment status values. **Note**: located in the `plssvm.environment` module.                                                                                                                                                              |                                                                                                                                                                                                                   |
 
@@ -349,7 +349,7 @@ and `sycl_kernel_invocation_type` to choose between the two different SYCL kerne
 | `score(model)`                                                                                                                               | Score the model with respect to itself returning its accuracy.                                                                                                                                                      |
 | `score(model, data_set)`                                                                                                                     | Score the model given the provided data set returning its accuracy.                                                                                                                                                 |
 
-#### `plssvm.openmp.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`
+#### `plssvm.openmp.CSVM`, `plssvm.hpx.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`
 
 These classes represent the backend specific CSVMs.
 **Note**: they are only available if the respective backend has been enabled during PLSSVM's build step.
@@ -560,4 +560,4 @@ The PLSSVM Python3 bindings define a few new exception types:
 | `ClassificationReportError`  | If something in the classification report went wrong. **Note**: shouldn't occur in user code.                          |
 | `EnvironmentError`           | If something during environment initialization or finalization went wrong.                                             |
 
-Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`).
\ No newline at end of file
+Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`).
diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp
index 8a1fa29fb..f88f8c2e2 100644
--- a/bindings/Python/backend_types.cpp
+++ b/bindings/Python/backend_types.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -20,6 +21,7 @@ void init_backend_types(py::module_ &m) {
     py::enum_<plssvm::backend_type>(m, "BackendType")
         .value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform")
         .value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)")
+        .value("HPX", plssvm::backend_type::hpx, "HPX to target CPUs only (currently no GPU executor support)")
         .value("STDPAR", plssvm::backend_type::stdpar, "C++ standard parallelism to target CPUs and GPUs from different vendors based on the used stdpar implementation; supported implementations are: nvhpc (nvc++), roc-stdpar, AdaptiveCpp, Intel LLVM (icpx), and GNU GCC + TBB")
         .value("CUDA", plssvm::backend_type::cuda, "CUDA to target NVIDIA GPUs only")
         .value("HIP", plssvm::backend_type::hip, "HIP to target AMD and NVIDIA GPUs")
diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp
new file mode 100644
index 000000000..165cda836
--- /dev/null
+++ b/bindings/Python/backends/hpx_csvm.cpp
@@ -0,0 +1,57 @@
+/**
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @author Alexander Strack
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "plssvm/backends/HPX/csvm.hpp"        // plssvm::hpx::csvm
+#include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
+#include "plssvm/csvm.hpp"                        // plssvm::csvm
+#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
+#include "plssvm/parameter.hpp"                   // plssvm::parameter
+#include "plssvm/target_platforms.hpp"            // plssvm::target_platform
+
+#include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
+
+#include "pybind11/pybind11.h"  // py::module_, py::class_, py::init
+#include "pybind11/stl.h"       // support for STL types
+
+#include <memory>  // std::make_unique
+
+namespace py = pybind11;
+
+void init_hpx_csvm(py::module_ &m, const py::exception<plssvm::exception> &base_exception) {
+    // use its own submodule for the HPX CSVM bindings
+    py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality");
+
+    // bind the CSVM using the HPX backend
+    py::class_<plssvm::hpx::csvm, plssvm::csvm>(hpx_module, "CSVM")
+        .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object")
+        .def(py::init<plssvm::parameter>(), "create an SVM with the automatic target platform and provided parameter object")
+        .def(py::init<plssvm::target_platform>(), "create an SVM with the provided target platform and default parameter object")
+        .def(py::init<plssvm::target_platform, plssvm::parameter>(), "create an SVM with the provided target platform and parameter object")
+        .def(py::init([](const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // create CSVM with the default target platform
+                 return std::make_unique<plssvm::hpx::csvm>(params);
+             }),
+             "create an SVM with the default target platform and keyword arguments")
+        .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) {
+                 // check for valid keys
+                 check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" });
+                 // if one of the value keyword parameter is provided, set the respective value
+                 const plssvm::parameter params = convert_kwargs_to_parameter(args);
+                 // create CSVM with the provided target platform
+                 return std::make_unique<plssvm::hpx::csvm>(target, params);
+             }),
+             "create an SVM with the provided target platform and keyword arguments");
+
+    // register HPX backend specific exceptions
+    register_py_exception<plssvm::hpx::backend_exception>(hpx_module, "BackendError", base_exception);
+}
diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp
index f37bc20db..170afa2c3 100644
--- a/bindings/Python/main.cpp
+++ b/bindings/Python/main.cpp
@@ -1,6 +1,7 @@
 /**
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -35,6 +36,7 @@ void init_environment(py::module_ &);
 void init_exceptions(py::module_ &, const py::exception<plssvm::exception> &);
 void init_csvm(py::module_ &);
 void init_openmp_csvm(py::module_ &, const py::exception<plssvm::exception> &);
+void init_hpx_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_stdpar_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_cuda_csvm(py::module_ &, const py::exception<plssvm::exception> &);
 void init_hip_csvm(py::module_ &, const py::exception<plssvm::exception> &);
@@ -86,6 +88,9 @@ PYBIND11_MODULE(plssvm, m) {
 #if defined(PLSSVM_HAS_OPENMP_BACKEND)
     init_openmp_csvm(m, base_exception);
 #endif
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    init_hpx_csvm(m, base_exception);
+#endif
 #if defined(PLSSVM_HAS_STDPAR_BACKEND)
     init_stdpar_csvm(m, base_exception);
 #endif

From 27b0d3f9a5232c46a64dcfed56e8c921e48818e7 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 23/48] Fix rebase error

---
 src/main_train.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main_train.cpp b/src/main_train.cpp
index 44aad5a21..d072633d8 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -117,6 +117,7 @@ int main(int argc, char *argv[]) {
                 PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY(*s);
             });
 #endif
+        };
         std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
 
         // stop CPU hardware sampler and dump results if available

From 170c73d135b54bfc8d110222d8f935f5f2b8d748 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 24/48] Add HPX cmake present and components

---
 CMakeLists.txt                         |   1 +
 CMakePresets.json                      |   3 +-
 README.md                              |   3 +
 cmake/plssvm/plssvmHPXTargets.cmake    |  21 ++++
 cmake/plssvm/plssvmOpenMPTargets.cmake |   4 +-
 cmake/presets/all.json                 |   7 +-
 cmake/presets/common.json              |   3 +-
 cmake/presets/hpx.json                 | 143 +++++++++++++++++++++++++
 8 files changed, 179 insertions(+), 6 deletions(-)
 create mode 100644 cmake/plssvm/plssvmHPXTargets.cmake
 create mode 100644 cmake/presets/hpx.json

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 099dac055..962af95cb 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -858,6 +858,7 @@ install(FILES
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHIPTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenCLTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmOpenMPTargets.cmake"
+        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmHPXTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmAdaptiveCppTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmDPCPPTargets.cmake"
         "${CMAKE_CURRENT_SOURCE_DIR}/cmake/plssvm/plssvmstdparTargets.cmake"
diff --git a/CMakePresets.json b/CMakePresets.json
index 8e4925dd0..c6bf7373f 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -2,6 +2,7 @@
   "version": 6,
   "include": [
     "cmake/presets/openmp.json",
+    "cmake/presets/hpx.json",
     "cmake/presets/stdpar.json",
     "cmake/presets/stdpar_gcc.json",
     "cmake/presets/stdpar_nvhpc.json",
@@ -15,4 +16,4 @@
     "cmake/presets/dpcpp.json",
     "cmake/presets/all.json"
   ]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index 425fe478e..ed3065678 100644
--- a/README.md
+++ b/README.md
@@ -360,6 +360,9 @@ Available configure presets:
   "openmp"                  - OpenMP backend
   "openmp_python"           - OpenMP backend + Python bindings
   "openmp_test"             - OpenMP backend tests
+  "hpx"                     - HPX backend
+  "hpx_python"              - HPX backend + Python bindings
+  "hpx_test"                - HPX backend tests
   "cuda"                    - CUDA backend
   "cuda_python"             - CUDA backend + Python bindings
   "cuda_test"               - CUDA backend tests
diff --git a/cmake/plssvm/plssvmHPXTargets.cmake b/cmake/plssvm/plssvmHPXTargets.cmake
new file mode 100644
index 000000000..8fa711790
--- /dev/null
+++ b/cmake/plssvm/plssvmHPXTargets.cmake
@@ -0,0 +1,21 @@
+## Authors: Alexander Van Craen, Marcel Breyer, Alexander Strack
+## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved
+## License: This file is part of the PLSSVM project which is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+include(CMakeFindDependencyMacro)
+
+# check if the HPX backend is available
+if (TARGET plssvm::plssvm-HPX)
+    # enable HPX
+    find_dependency(HPX)
+    # set alias targets
+    add_library(plssvm::HPX ALIAS plssvm::plssvm-HPX)
+    add_library(plssvm::hpx ALIAS plssvm::plssvm-HPX)
+    # set COMPONENT to be found
+    set(plssvm_HPX_FOUND ON)
+else ()
+    # set COMPONENT to be NOT found
+    set(plssvm_HPX_FOUND OFF)
+endif ()
diff --git a/cmake/plssvm/plssvmOpenMPTargets.cmake b/cmake/plssvm/plssvmOpenMPTargets.cmake
index db95e1d1e..d8a6951f2 100644
--- a/cmake/plssvm/plssvmOpenMPTargets.cmake
+++ b/cmake/plssvm/plssvmOpenMPTargets.cmake
@@ -6,7 +6,7 @@
 
 include(CMakeFindDependencyMacro)
 
-# check if the OpenCL backend is available
+# check if the OpenMP backend is available
 if (TARGET plssvm::plssvm-OpenMP)
     # enable OpenMP
     find_dependency(OpenMP)
@@ -18,4 +18,4 @@ if (TARGET plssvm::plssvm-OpenMP)
 else ()
     # set COMPONENT to be NOT found
     set(plssvm_OpenMP_FOUND OFF)
-endif ()
\ No newline at end of file
+endif ()
diff --git a/cmake/presets/all.json b/cmake/presets/all.json
index 76528069b..a1db4d1bc 100644
--- a/cmake/presets/all.json
+++ b/cmake/presets/all.json
@@ -9,6 +9,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -23,6 +24,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -39,6 +41,7 @@
       "cacheVariables": {
         "CMAKE_CXX_COMPILER": "clang++",
         "PLSSVM_ENABLE_OPENMP_BACKEND": "AUTO",
+        "PLSSVM_ENABLE_HPX_BACKEND": "AUTO",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO",
         "PLSSVM_ENABLE_HIP_BACKEND": "AUTO",
@@ -84,7 +87,7 @@
       "inherits": "common",
       "filter": {
         "include": {
-          "name": "OpenMP.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
+          "name": "OpenMP.*|HPX.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*"
         }
       }
     }
@@ -155,4 +158,4 @@
       ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/common.json b/cmake/presets/common.json
index fac5b4b22..68da8cd61 100644
--- a/cmake/presets/common.json
+++ b/cmake/presets/common.json
@@ -12,6 +12,7 @@
       "binaryDir": "build/${presetName}",
       "cacheVariables": {
         "PLSSVM_ENABLE_OPENMP_BACKEND": "OFF",
+        "PLSSVM_ENABLE_HPX_BACKEND": "OFF",
         "PLSSVM_ENABLE_STDPAR_BACKEND": "OFF",
         "PLSSVM_ENABLE_CUDA_BACKEND": "OFF",
         "PLSSVM_ENABLE_HIP_BACKEND": "OFF",
@@ -66,4 +67,4 @@
       }
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/cmake/presets/hpx.json b/cmake/presets/hpx.json
new file mode 100644
index 000000000..8ca724653
--- /dev/null
+++ b/cmake/presets/hpx.json
@@ -0,0 +1,143 @@
+{
+  "version": 6,
+  "include": ["common.json"],
+  "configurePresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
+      }
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings",
+      "inherits": "build",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu",
+        "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
+        "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
+      }
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend tests",
+      "inherits": "test",
+      "cacheVariables": {
+        "PLSSVM_ENABLE_HPX_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend",
+      "configurePreset": "hpx",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings",
+      "configurePreset": "hpx_python",
+      "configuration": "RelWithDebInfo",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend tests",
+      "configurePreset": "hpx_test",
+      "configuration": "Debug",
+      "inherits": "common"
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "hpx_test",
+      "displayName": "HPX backend all tests",
+      "configurePreset": "hpx_test",
+      "inherits": "common"
+    },
+    {
+      "name": "hpx_backend_test",
+      "displayName": "HPX backend specific tests",
+      "configurePreset": "hpx_test",
+      "inherits": "common",
+      "filter": {
+        "include": {
+          "name": "HPX.*"
+        }
+      }
+    }
+  ],
+  "workflowPresets": [
+    {
+      "name": "hpx",
+      "displayName": "HPX backend workflow",
+      "steps": [
+        {
+          "name": "hpx",
+          "type": "configure"
+        },
+        {
+          "name": "hpx",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "hpx_python",
+      "displayName": "HPX backend + Python bindings workflow",
+      "steps": [
+        {
+          "name": "hpx_python",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_python",
+          "type": "build"
+        }
+      ]
+    },
+    {
+      "name": "hpx_test",
+      "displayName": "HPX test workflow",
+      "steps": [
+        {
+          "name": "hpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_test",
+          "type": "build"
+        },
+        {
+          "name": "hpx_test",
+          "type": "test"
+        }
+      ]
+    },
+    {
+      "name": "hpx_backend_test",
+      "displayName": "HPX backend test workflow",
+      "steps": [
+        {
+          "name": "hpx_test",
+          "type": "configure"
+        },
+        {
+          "name": "hpx_test",
+          "type": "build"
+        },
+        {
+          "name": "hpx_backend_test",
+          "type": "test"
+        }
+      ]
+    }
+  ]
+}

From fbb763473bbefe01fed0a54a8b3bc3c0ab32c78f Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 25/48] Set target platform for OpenMP preset

---
 cmake/presets/openmp.json | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cmake/presets/openmp.json b/cmake/presets/openmp.json
index c4dd27a4d..1031d50d0 100644
--- a/cmake/presets/openmp.json
+++ b/cmake/presets/openmp.json
@@ -7,7 +7,8 @@
       "displayName": "OpenMP backend",
       "inherits": "build",
       "cacheVariables": {
-        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
       }
     },
     {
@@ -16,6 +17,7 @@
       "inherits": "build",
       "cacheVariables": {
         "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu",
         "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON",
         "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON"
       }
@@ -25,7 +27,8 @@
       "displayName": "OpenMP backend tests",
       "inherits": "test",
       "cacheVariables": {
-        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON"
+        "PLSSVM_ENABLE_OPENMP_BACKEND": "ON",
+        "PLSSVM_TARGET_PLATFORMS": "cpu"
       }
     }
   ],
@@ -137,4 +140,4 @@
       ]
     }
   ]
-}
\ No newline at end of file
+}

From 444afa129f0aa5d8f01a1c29d5552cdf6cd1ed16 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 26/48] Concretize the minimum required HPX version

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ed3065678..3224c9868 100644
--- a/README.md
+++ b/README.md
@@ -108,7 +108,7 @@ Additional dependencies for the stdpar backend:
 
 Additional dependencies for the HPX backend:
 
-- [HPX ≥ v1.9](https://hpx.stellar-group.org/)
+- [HPX ≥ v1.9.0](https://hpx.stellar-group.org/)
 
 Additional dependencies for the CUDA backend:
 

From 958bfcecebad93ff147114872f560b8b8b4c0689 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 27/48] Fix wrong path to HPX backend

---
 docs/resources/dirs.dox | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox
index 671970e41..84e561a46 100644
--- a/docs/resources/dirs.dox
+++ b/docs/resources/dirs.dox
@@ -330,7 +330,7 @@
  */
 
  /**
- * @dir include/plssvm/backends/hpx
+ * @dir include/plssvm/backends/HPX
  * @author Alexander Van Craen
  * @author Marcel Breyer
  * @author Alexander strack
@@ -342,7 +342,7 @@
  */
 
 /**
- * @dir include/plssvm/backends/hpx/detail
+ * @dir include/plssvm/backends/HPX/detail
  * @author Alexander Van Craen
  * @author Marcel Breyer
  * @authir Alexander Strack
@@ -354,7 +354,7 @@
  */
 
 /**
- * @dir include/plssvm/backends/hpx/kernel
+ * @dir include/plssvm/backends/HPX/kernel
  * @author Alexander Van Craen
  * @author Marcel Breyer
  * @author Alexander Strack
@@ -366,7 +366,7 @@
  */
 
 /**
- * @dir include/plssvm/backends/hpx/kernel/cg_explicit
+ * @dir include/plssvm/backends/HPX/kernel/cg_explicit
  * @author Alexander Van Craen
  * @author Marcel Breyer
  * @author Alexander Strack
@@ -378,7 +378,7 @@
  */
 
 /**
- * @dir include/plssvm/backends/hpx/kernel/cg_implicit
+ * @dir include/plssvm/backends/HPX/kernel/cg_implicit
  * @author Alexander Van Craen
  * @author Marcel Breyer
  * @author Alexander Strack 

From b2ef7e0a82f69af6c2b07b798b314c0420ef914f Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:21 +0100
Subject: [PATCH 28/48] Comment HPX Scope Guard struct

---
 include/plssvm/backends/HPX/detail/utility.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index d2d3be36f..697121b89 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -49,9 +49,16 @@ void stop_hpx_runtime();
  */
 struct scope_guard
 {
+  /**
+   * @brief Scope Guard constructor that starts the runtime of the HPX backend.
+   */
   scope_guard(){
     start_hpx_runtime();
   }
+
+  /**
+   * @brief Scope Guard destructor that stops the runtime of the HPX backend.
+   */
   ~scope_guard()
   {
     stop_hpx_runtime();

From 0196fabd408fed8843acbe9f6f951d6e2de2e661 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:22 +0100
Subject: [PATCH 29/48] Add HPX as supported component

---
 cmake/plssvm/plssvmConfig.cmake.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in
index e6be17d15..9636e125e 100644
--- a/cmake/plssvm/plssvmConfig.cmake.in
+++ b/cmake/plssvm/plssvmConfig.cmake.in
@@ -25,7 +25,7 @@ find_dependency(fmt REQUIRED)
 include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake")
 
 # list all available libraries
-set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
+set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;HPX;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar")
 set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}")
 
 # check which libraries are available

From ba965021df7f1e15f4e876391036c3ea171aa574 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 19:29:22 +0100
Subject: [PATCH 30/48] Add minimum HPX version

---
 src/main_predict.cpp                   | 9 ---------
 src/main_train.cpp                     | 9 ---------
 src/plssvm/backends/HPX/CMakeLists.txt | 2 +-
 tests/main.cpp                         | 9 ---------
 4 files changed, 1 insertion(+), 28 deletions(-)

diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 931f41a77..7f15377e6 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -20,9 +20,6 @@
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
-#endif
 #include "fmt/format.h"  // fmt::print
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
 #include "fmt/ranges.h"  // fmt::join
@@ -70,12 +67,6 @@ int main(int argc, char *argv[]) {
                             "\ntask: prediction\n{}\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-        const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
-        if (use_hpx_as_backend){
-            hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
-        }
-#endif
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
diff --git a/src/main_train.cpp b/src/main_train.cpp
index d072633d8..d11b49ea9 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -19,9 +19,6 @@
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
-#endif
 #include <algorithm>    // std::for_each
 #include <chrono>       // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace
 #include <cstddef>      // std::size_t
@@ -66,12 +63,6 @@ int main(int argc, char *argv[]) {
                             "\ntask: training\n{}\n\n\n",
                             plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser });
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-        const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
-        if (use_hpx_as_backend){
-            hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
-        }
-#endif
         // create data set
         const auto data_set_visitor = [&](auto &&data) {
             using label_type = typename std::remove_reference_t<decltype(data)>::label_type;
diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index 951722bd6..8ebde5e46 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -9,7 +9,7 @@ list(APPEND CMAKE_MESSAGE_INDENT "HPX:  ")
 # check if HPX can be enabled
 message(CHECK_START "Checking for HPX backend")
 
-find_package(HPX)
+find_package(HPX 1.9.0)
 
 if (NOT HPX_FOUND)
     message(CHECK_FAIL "not found")
diff --git a/tests/main.cpp b/tests/main.cpp
index 992a0be31..0623c7a26 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -15,9 +15,6 @@
 
 #include <cstdlib>  // std::atexit
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "plssvm/backends/HPX/detail/utility.hpp"   // plssvm::hpx::detail::scope_guard
-#endif
 // silence GTest warnings/test errors
 
 // generic CSVM tests
@@ -66,11 +63,5 @@ int main(int argc, char **argv) {
 #if !defined(_WIN32)
     ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
 #endif
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    // create std::unique_ptr containing a plssvm::hpx::detail::scope_guard
-    // -> used to automatically handle HPX runtime initialization and finalization
-    std::unique_ptr<plssvm::hpx::detail::scope_guard> hpx_guard{};
-    hpx_guard = std::make_unique<plssvm::hpx::detail::scope_guard>();
-#endif 
     return RUN_ALL_TESTS();
 }

From 5210fee106260fb65338a699506e3a171a7e6945 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 13 Nov 2024 21:38:19 +0100
Subject: [PATCH 31/48] Add HPX runtime to environment header

---
 include/plssvm/environment.hpp             |  6 ++++++
 src/plssvm/backends/HPX/detail/utility.cpp | 16 +---------------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 692c362b5..324d6317d 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -30,6 +30,12 @@
 #include <string>   // std::string
 #include <vector>   // std::vector
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    #include "hpx/runtime.hpp"                  // ::hpx::{is_running, is_stopped}
+    #include <hpx/hpx_start.hpp>                // ::hpx::{start, stop, finalize}
+    #include <hpx/execution.hpp>                // ::hpx::post
+#endif
+
 namespace plssvm::environment {
 
 /**
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index 717de95d9..c36422661 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -6,8 +6,7 @@
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
  */
-#include <hpx/hpx_start.hpp>                 // hpx::{start, stop, finalize}
-#include <hpx/execution.hpp>                 // hpx::post
+
 #include <hpx/runtime_distributed.hpp>       // ::hpx::get_num_worker_threads
 #include <hpx/version.hpp>                   // ::hpx::full_version_as_string
 #include "plssvm/backends/HPX/detail/utility.hpp"
@@ -24,17 +23,4 @@ int get_num_threads() {
     // get the number of used HPX threads
     return static_cast<int>(::hpx::get_num_worker_threads());
 }
-
-void start_hpx_runtime() {
-    // Initialize HPX runtime, but do not run hpx_main and do not pass commandline arguments
-    // Set HPX commandline arguments with the HPX_COMMANDLINE_OPTIONS="" environment variable
-    ::hpx::start(nullptr, 0, nullptr);
-}
-
-void stop_hpx_runtime() {
-   // Finalize all existing HPX tasks
-   ::hpx::post([]{::hpx::finalize();});
-   // Stop HPX runtime
-   ::hpx::stop();
-}
 }  // namespace plssvm::hpx::detail

From d220f4d5225c709e12a42cacfb712a25851097e2 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 13:27:46 +0100
Subject: [PATCH 32/48] Remove HPX Scope Guard as now obsolete

---
 .../plssvm/backends/HPX/detail/utility.hpp    | 30 -------------------
 include/plssvm/environment.hpp                |  6 ++--
 2 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 697121b89..4d7c412cf 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -34,36 +34,6 @@ using boost::atomic_ref;
  */
 [[nodiscard]] std::string get_hpx_version();
 
-/**
- * @brief Start the runtime of the HPX backend.
- */
-void start_hpx_runtime();
-
-/**
- * @brief Stop the runtime of the HPX backend.
- */
-void stop_hpx_runtime();
-
-/**
- * @brief Scope Guard that leverages RAII to start and correctly teardown the HPX runtime even in case of exceptions.
- */
-struct scope_guard
-{
-  /**
-   * @brief Scope Guard constructor that starts the runtime of the HPX backend.
-   */
-  scope_guard(){
-    start_hpx_runtime();
-  }
-
-  /**
-   * @brief Scope Guard destructor that stops the runtime of the HPX backend.
-   */
-  ~scope_guard()
-  {
-    stop_hpx_runtime();
-  }
-};
 }  // namespace plssvm::hpx::detail
 
 #endif  // PLSSVM_BACKENDS_HPX_DETAIL_UTILITY_HPP_
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 324d6317d..0931b7f3b 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -31,9 +31,9 @@
 #include <vector>   // std::vector
 
 #if defined(PLSSVM_HAS_HPX_BACKEND)
-    #include "hpx/runtime.hpp"                  // ::hpx::{is_running, is_stopped}
-    #include <hpx/hpx_start.hpp>                // ::hpx::{start, stop, finalize}
-    #include <hpx/execution.hpp>                // ::hpx::post
+    #include <hpx/execution.hpp>  // ::hpx::post
+    #include <hpx/hpx_start.hpp>  // ::hpx::{start, stop, finalize}
+    #include <hpx/runtime.hpp>    // ::hpx::{is_running, is_stopped}
 #endif
 
 namespace plssvm::environment {

From 02b400ef11e6e36e705bfa6432f2f526466ecf85 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 13:28:35 +0100
Subject: [PATCH 33/48] Cleanup main_train.cpp after rebase error

---
 src/main_train.cpp | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/main_train.cpp b/src/main_train.cpp
index d11b49ea9..7f99409c7 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -19,6 +19,7 @@
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
+
 #include <algorithm>    // std::for_each
 #include <chrono>       // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace
 #include <cstddef>      // std::size_t
@@ -48,6 +49,7 @@ int main(int argc, char *argv[]) {
         hws::system_hardware_sampler sampler{ PLSSVM_HARDWARE_SAMPLING_INTERVAL };
         sampler.start_sampling();
 #endif
+
         // parse SVM parameter from command line
         plssvm::detail::cmd::parser_train cmd_parser{ argc, argv };
 
@@ -77,14 +79,6 @@ int main(int argc, char *argv[]) {
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)
                                                                           : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params);
 
-#if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
-            // initialize hardware sampling
-            std::vector<std::unique_ptr<plssvm::detail::tracking::hardware_sampler>> sampler =
-                plssvm::detail::tracking::create_hardware_sampler(svm->get_target_platform(), svm->num_available_devices(), PLSSVM_HARDWARE_SAMPLING_INTERVAL);
-            // start sampling
-            std::for_each(sampler.begin(), sampler.end(), std::mem_fn(&plssvm::detail::tracking::hardware_sampler::start_sampling));
-#endif
-
             // only specify plssvm::max_iter if it isn't its default value
             const plssvm::model<label_type> model =
                 cmd_parser.max_iter == std::size_t{ 0 }
@@ -99,15 +93,6 @@ int main(int argc, char *argv[]) {
                                plssvm::solver = cmd_parser.solver);
             // save model to file
             model.save(cmd_parser.model_filename);
-
-#if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
-            // stop sampling
-            std::for_each(sampler.begin(), sampler.end(), std::mem_fn(&plssvm::detail::tracking::hardware_sampler::stop_sampling));
-            // write samples to yaml file
-            std::for_each(sampler.cbegin(), sampler.cend(), [&](const std::unique_ptr<plssvm::detail::tracking::hardware_sampler> &s) {
-                PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY(*s);
-            });
-#endif
         };
         std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser));
 
@@ -123,6 +108,7 @@ int main(int argc, char *argv[]) {
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;

From af4c5f67c802eed86eca5a2a4ca0943a77e01fcf Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 14:24:14 +0100
Subject: [PATCH 34/48] Add HPX backend to commandline output and set correct
 priority

---
 README.md                    | 4 ++--
 src/plssvm/backend_types.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3224c9868..394dd8e04 100644
--- a/README.md
+++ b/README.md
@@ -553,7 +553,7 @@ Usage:
   -i, --max_iter arg            set the maximum number of CG iterations (default: num_features)
   -l, --solver arg              choose the solver: automatic|cg_explicit|cg_implicit (default: automatic)
   -a, --classification arg      the classification strategy to use for multi-class classification: oaa|oao (default: oaa)
-  -b, --backend arg             choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic)
+  -b, --backend arg             choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|stdpar (default: automatic)
   -p, --target_platform arg     choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic)
       --sycl_kernel_invocation_type arg
                                 choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic)
@@ -597,7 +597,7 @@ The `--backend=automatic` option works as follows:
 - if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
 - otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar`
 - otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `stdpar`
-- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `stdpar`
+- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar`
 
 Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist.
 
diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp
index 56da55b8c..34789a764 100644
--- a/src/plssvm/backend_types.cpp
+++ b/src/plssvm/backend_types.cpp
@@ -65,7 +65,7 @@ backend_type determine_default_backend(const std::vector<backend_type> &availabl
         decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } },
         decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } },
-        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar, backend_type::hpx } }
+        decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::hpx, backend_type::stdpar } }
     };
 
     // return the default backend based on the previously defined decision order

From fdbf38ccedbae2acb612c36ffa6056e35781dac3 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 14:40:12 +0100
Subject: [PATCH 35/48] Add note to manage HPX runtime in Python

---
 bindings/Python/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bindings/Python/README.md b/bindings/Python/README.md
index cc21104f5..afe9d6bb4 100644
--- a/bindings/Python/README.md
+++ b/bindings/Python/README.md
@@ -337,6 +337,10 @@ If the most performant backend should be used, it is sufficient to use `plssvm.C
 `sycl_implementation_type` to choose between DPC++ and AdaptiveCpp as SYCL implementations
 and `sycl_kernel_invocation_type` to choose between the two different SYCL kernel invocation types.
 
+**Note**: if the backend type is `plssvm.BackendType.HPX` it is necessary to initialize and finalize the HPX runtime.
+The runtime can be manually managed using `plssvm.environment.initialize()` and `plssvm.environment.finalize()`.
+We recommend utilizing `plssvm.environment.ScopeGuard()` to manage the lifetime of the HPX runtime automatically.
+
 | methods                                                                                                                                      | description                                                                                                                                                                                                         |
 |----------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `set_params(params)`                                                                                                                         | Replace the current `plssvm.Parameter` with the provided one.                                                                                                                                                       |

From 66ea031c1ceaafe0972d768df58902985d7f4ea7 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 14:41:42 +0100
Subject: [PATCH 36/48] Add HPX to backend vector if used

---
 src/main_predict.cpp | 11 ++++++++++-
 src/main_train.cpp   |  9 ++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 7f15377e6..2b9417f99 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -20,6 +20,7 @@
 #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED)
     #include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 #endif
+
 #include "fmt/format.h"  // fmt::print
 #include "fmt/os.h"      // fmt::ostream, fmt::output_file
 #include "fmt/ranges.h"  // fmt::join
@@ -74,8 +75,15 @@ int main(int argc, char *argv[]) {
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
 
+            // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+            const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+            
             // initialize environments if necessary
-            environment_guard = std::make_unique<plssvm::environment::scope_guard>();
+            std::vector<plssvm::backend_type> backends_to_initialize{};
+            if (use_hpx_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::hpx);
+            }
+            environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create default csvm
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type)
@@ -166,6 +174,7 @@ int main(int argc, char *argv[]) {
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
+    
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 7f99409c7..295b2d4f5 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -72,8 +72,15 @@ int main(int argc, char *argv[]) {
             // check whether SYCL is used as backend (it is either requested directly or as automatic backend)
             const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) };
 
+            // check whether HPX is used as backend (it is either requested directly or as automatic backend)
+            const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
+            
             // initialize environments if necessary
-            environment_guard = std::make_unique<plssvm::environment::scope_guard>();
+            std::vector<plssvm::backend_type> backends_to_initialize{};
+            if (use_hpx_as_backend) {
+                backends_to_initialize.push_back(plssvm::backend_type::hpx);
+            }
+            environment_guard = std::make_unique<plssvm::environment::scope_guard>(backends_to_initialize);
 
             // create SVM
             const std::unique_ptr<plssvm::csvm> svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type)

From 148f0ba1a4c755132da914d7105a310ddff42cc0 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Thu, 14 Nov 2024 14:52:16 +0100
Subject: [PATCH 37/48] Minor clang-format changes

---
 src/main_predict.cpp | 4 ++--
 src/main_train.cpp   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main_predict.cpp b/src/main_predict.cpp
index 2b9417f99..ff28028c8 100644
--- a/src/main_predict.cpp
+++ b/src/main_predict.cpp
@@ -77,7 +77,7 @@ int main(int argc, char *argv[]) {
 
             // check whether HPX is used as backend (it is either requested directly or as automatic backend)
             const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
-            
+
             // initialize environments if necessary
             std::vector<plssvm::backend_type> backends_to_initialize{};
             if (use_hpx_as_backend) {
@@ -174,7 +174,7 @@ int main(int argc, char *argv[]) {
                             plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time) });
 
         PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename);
-    
+
     } catch (const plssvm::exception &e) {
         std::cerr << e.what_with_loc() << std::endl;
         return EXIT_FAILURE;
diff --git a/src/main_train.cpp b/src/main_train.cpp
index 295b2d4f5..32ac09d71 100644
--- a/src/main_train.cpp
+++ b/src/main_train.cpp
@@ -74,7 +74,7 @@ int main(int argc, char *argv[]) {
 
             // check whether HPX is used as backend (it is either requested directly or as automatic backend)
             const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) };
-            
+
             // initialize environments if necessary
             std::vector<plssvm::backend_type> backends_to_initialize{};
             if (use_hpx_as_backend) {

From 2024484573b5247eec0647c57bc5074b34aeb31c Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Mon, 18 Nov 2024 15:38:02 +0100
Subject: [PATCH 38/48] Add HPX runtime to rewritten environment header

---
 include/plssvm/environment.hpp | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 0931b7f3b..194cca9e5 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -106,7 +106,8 @@ namespace detail {
  * @return the respective environment status (`[[nodiscard]]`)
  */
 [[nodiscard]] inline status determine_status_from_initialized_finalized_flags(const bool is_initialized, const bool is_finalized) {
-    if (!is_initialized && !is_finalized) {
+    if (!is_initialized) {
+        // Note: ::hpx::is_stopped does return true even before calling finalize once
         return status::uninitialized;
     } else if (is_initialized && !is_finalized) {
         return status::initialized;
@@ -154,6 +155,14 @@ template <auto is_initialized_function, auto is_finalized_function>
         case backend_type::sycl:
             // no environment necessary to manage these backends
             return status::unnecessary;
+        case backend_type::hpx:
+            {
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+                return detail::determine_status_from_initialized_finalized_functions<::hpx::is_running, ::hpx::is_stopped>();
+#else
+                return status::unnecessary;
+#endif
+            }
     }
     // should never be reached!
     ::plssvm::detail::unreachable();
@@ -183,7 +192,12 @@ namespace detail {
 inline void initialize_backend([[maybe_unused]] const backend_type backend) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::start(nullptr, 0, nullptr);
+    }
+#endif
 }
 
 /**
@@ -195,7 +209,12 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend) {
 inline void initialize_backend([[maybe_unused]] const backend_type backend, [[maybe_unused]] int &argc, [[maybe_unused]] char **argv) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be initialized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::start(nullptr, argc, argv);
+    }
+#endif
 }
 
 /**
@@ -205,7 +224,12 @@ inline void initialize_backend([[maybe_unused]] const backend_type backend, [[ma
 inline void finalize_backend([[maybe_unused]] const backend_type backend) {
     PLSSVM_ASSERT(backend != backend_type::automatic, "The automatic backend may never be finalized!");
     // Note: must be implemented for the backends that need environmental setup
-    // nothing to do for all available backends
+    // only have to perform special initialization steps for the HPX backend
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    if (backend == backend_type::hpx) {
+        ::hpx::post( []{::hpx::finalize();} );
+    }
+#endif
 }
 
 /**

From 4a3e3d520a23ffd4e538d6ef4de6c6b035a16bfc Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Mon, 18 Nov 2024 15:44:27 +0100
Subject: [PATCH 39/48] Major clang format changes after rebase

---
 bindings/Python/backends/hpx_csvm.cpp         |   8 +-
 include/plssvm/backends/HPX/csvm.hpp          |  20 +-
 .../plssvm/backends/HPX/detail/utility.hpp    |   3 +-
 .../backends/HPX/kernel/cg_explicit/blas.hpp  |  18 +-
 .../cg_explicit/kernel_matrix_assembly.hpp    |  28 +-
 .../kernel_matrix_assembly_blas.hpp           |  32 +--
 .../backends/HPX/kernel/predict_kernel.hpp    |  34 +--
 include/plssvm/environment.hpp                |   2 +-
 src/plssvm/backends/HPX/csvm.cpp              | 254 +++++++++---------
 src/plssvm/backends/HPX/detail/utility.cpp    |   6 +-
 tests/backends/HPX/hpx_csvm.cpp               |  40 +--
 11 files changed, 222 insertions(+), 223 deletions(-)

diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp
index 165cda836..92b4fef10 100644
--- a/bindings/Python/backends/hpx_csvm.cpp
+++ b/bindings/Python/backends/hpx_csvm.cpp
@@ -9,10 +9,10 @@
 
 #include "plssvm/backends/HPX/csvm.hpp"        // plssvm::hpx::csvm
 #include "plssvm/backends/HPX/exceptions.hpp"  // plssvm::hpx::backend_exception
-#include "plssvm/csvm.hpp"                        // plssvm::csvm
-#include "plssvm/exceptions/exceptions.hpp"       // plssvm::exception
-#include "plssvm/parameter.hpp"                   // plssvm::parameter
-#include "plssvm/target_platforms.hpp"            // plssvm::target_platform
+#include "plssvm/csvm.hpp"                     // plssvm::csvm
+#include "plssvm/exceptions/exceptions.hpp"    // plssvm::exception
+#include "plssvm/parameter.hpp"                // plssvm::parameter
+#include "plssvm/target_platforms.hpp"         // plssvm::target_platform
 
 #include "bindings/Python/utility.hpp"  // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception
 
diff --git a/include/plssvm/backends/HPX/csvm.hpp b/include/plssvm/backends/HPX/csvm.hpp
index 0be0dc86a..d9dba1e6e 100644
--- a/include/plssvm/backends/HPX/csvm.hpp
+++ b/include/plssvm/backends/HPX/csvm.hpp
@@ -14,15 +14,15 @@
 #define PLSSVM_BACKENDS_HPX_CSVM_HPP_
 #pragma once
 
-#include "plssvm/constants.hpp"                             // plssvm::real_type
-#include "plssvm/csvm.hpp"                                  // plssvm::csvm, plssvm::detail::csvm_backend_exists
-#include "plssvm/detail/memory_size.hpp"                    // plssvm::detail::memory_size
-#include "plssvm/detail/move_only_any.hpp"                  // plssvm::detail::move_only_any
-#include "plssvm/detail/type_traits.hpp"                    // PLSSVM_REQUIRES
-#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
-#include "plssvm/parameter.hpp"                             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
-#include "plssvm/solver_types.hpp"                          // plssvm::solver_type
-#include "plssvm/target_platforms.hpp"                      // plssvm::target_platform
+#include "plssvm/constants.hpp"             // plssvm::real_type
+#include "plssvm/csvm.hpp"                  // plssvm::csvm, plssvm::detail::csvm_backend_exists
+#include "plssvm/detail/memory_size.hpp"    // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"  // plssvm::detail::move_only_any
+#include "plssvm/detail/type_traits.hpp"    // PLSSVM_REQUIRES
+#include "plssvm/matrix.hpp"                // plssvm::aos_matrix
+#include "plssvm/parameter.hpp"             // plssvm::parameter, plssvm::detail::has_only_parameter_named_args_v
+#include "plssvm/solver_types.hpp"          // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"      // plssvm::target_platform
 
 #include <cstddef>      // std::size_t
 #include <type_traits>  // std::true_type
@@ -113,7 +113,7 @@ class csvm : public ::plssvm::csvm {
         return 1;
     }
 
-    protected:
+  protected:
     /**
      * @copydoc plssvm::csvm::get_device_memory
      */
diff --git a/include/plssvm/backends/HPX/detail/utility.hpp b/include/plssvm/backends/HPX/detail/utility.hpp
index 4d7c412cf..3fcdb04d0 100644
--- a/include/plssvm/backends/HPX/detail/utility.hpp
+++ b/include/plssvm/backends/HPX/detail/utility.hpp
@@ -15,8 +15,7 @@
 #pragma once
 
 #include "boost/atomic/atomic_ref.hpp"  // boost::atomic_ref
-
-#include <string>  // std::string
+#include <string>                       // std::string
 
 namespace plssvm::hpx::detail {
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
index 8bd36f102..09f6e6358 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/blas.hpp
@@ -19,13 +19,13 @@
 #include "plssvm/matrix.hpp"         // plssvm::soa_matrix
 #include "plssvm/shape.hpp"          // plssvm::shape
 
-#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
-#include <array>      // std::array
-#include <cmath>      // std::ceil
-#include <cstddef>    // std::size_t
-#include <numeric>    // std::iota
-#include <vector>     // std::vector
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -51,9 +51,9 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
     // cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
     const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);
     const auto PADDING_SIZE_uz = static_cast<std::size_t>(PADDING_SIZE);
-    
+
     // define range over which should be iterated
-    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);   // define range over which should be iterated
+    std::vector<std::size_t> range(blocked_num_rhs * blocked_num_rows);  // define range over which should be iterated
     std::iota(range.begin(), range.end(), 0);
 
     ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
diff --git a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
index f73570c0f..2e59bf078 100644
--- a/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp
@@ -15,18 +15,18 @@
 #pragma once
 
 #include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
-#include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
-#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
-#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
-#include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix
-
-#include <hpx/execution.hpp>                                // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp>   // hpx::for_each
-#include <array>      // std::array
-#include <cmath>      // std::ceil, std::sqrt
-#include <cstddef>    // std::size_t
-#include <numeric>    // std::iota
-#include <vector>     // std::vector
+#include "plssvm/constants.hpp"                             // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil, std::sqrt
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -59,11 +59,11 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
     std::vector<std::size_t> range(blocked_dept * (blocked_dept + 1) / 2);
     std::iota(range.begin(), range.end(), 0);
 
- ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
+    ::hpx::for_each(::hpx::execution::par_unseq, range.begin(), range.end(), [&](const std::size_t idx) {
         // calculate the indices used in the current thread
         const std::size_t col = static_cast<std::size_t>(static_cast<double>(blocked_dept) + 0.5 - 0.5 * std::sqrt(4 * (blocked_dept * blocked_dept + blocked_dept - 2 * idx) + 1));
         const std::size_t row = static_cast<std::size_t>(0.5 * static_cast<double>(2 * (idx - col * blocked_dept) + col * col + col));
-  
+
         const std::size_t row_idx = row * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t col_idx = col * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
index 0b41909ce..eef6b809d 100644
--- a/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
+++ b/include/plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp
@@ -14,22 +14,22 @@
 #define PLSSVM_BACKENDS_HPX_KERNEL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_
 #pragma once
 
-#include "plssvm/backends/HPX/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
-#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
-#include "plssvm/constants.hpp"                                // plssvm::real_type
-#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
-#include "plssvm/detail/operators.hpp"                         // overloaded arithmetic operations for a plssvm::matrix
-#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
-#include "plssvm/kernel_functions.hpp"                         // plssvm::kernel_function
-#include "plssvm/matrix.hpp"                                   // aos_matrix
-
-#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
-#include <array>      // std::array
-#include <cmath>      // std::ceil
-#include <cstddef>    // std::size_t, std::sqrt
-#include <numeric>    // std::iota
-#include <vector>     // std::vector
+#include "plssvm/backends/HPX/detail/utility.hpp"           // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                             // plssvm::real_type
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/detail/operators.hpp"                      // overloaded arithmetic operations for a plssvm::matrix
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/kernel_functions.hpp"                      // plssvm::kernel_function
+#include "plssvm/matrix.hpp"                                // aos_matrix
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::ceil
+#include <cstddef>                                         // std::size_t, std::sqrt
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
 
 namespace plssvm::hpx::detail {
 
diff --git a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
index 03a91dd9f..7b153d889 100644
--- a/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
+++ b/include/plssvm/backends/HPX/kernel/predict_kernel.hpp
@@ -14,21 +14,21 @@
 #define PLSSVM_BACKENDS_HPX_KERNEL_PREDICT_KERNEL_HPP_
 #pragma once
 
-#include "plssvm/backends/HPX/detail/utility.hpp"              // plssvm::hpx::detail::atomic_ref
-#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"     // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
-#include "plssvm/constants.hpp"                                // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
-#include "plssvm/detail/assert.hpp"                            // PLSSVM_ASSERT
-#include "plssvm/kernel_function_types.hpp"                    // plssvm::kernel_function_type
-#include "plssvm/matrix.hpp"                                   // plssvm::aos_matrix, plssvm::soa_matrix
-#include "plssvm/shape.hpp"                                    // plssvm::shape
-
-#include <hpx/execution.hpp>                              // hpx::execution::par_unseq
-#include <hpx/parallel/segmented_algorithms/for_each.hpp> // hpx::for_each
-#include <array>      // std::array
-#include <cmath>      // std::fma
-#include <cstddef>    // std::size_t
-#include <numeric>    // std::iota
-#include <vector>     // std::vector
+#include "plssvm/backends/HPX/detail/utility.hpp"           // plssvm::hpx::detail::atomic_ref
+#include "plssvm/backends/HPX/kernel/kernel_functions.hpp"  // plssvm::hpx::detail::{feature_reduce, apply_kernel_function}
+#include "plssvm/constants.hpp"                             // plssvm::{real_type, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE}
+#include "plssvm/detail/assert.hpp"                         // PLSSVM_ASSERT
+#include "plssvm/kernel_function_types.hpp"                 // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/shape.hpp"                                 // plssvm::shape
+
+#include <array>                                           // std::array
+#include <cmath>                                           // std::fma
+#include <cstddef>                                         // std::size_t
+#include <hpx/execution.hpp>                               // hpx::execution::par_unseq
+#include <hpx/parallel/segmented_algorithms/for_each.hpp>  // hpx::for_each
+#include <numeric>                                         // std::iota
+#include <vector>                                          // std::vector
 
 namespace plssvm::hpx::detail {
 
@@ -61,7 +61,7 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
         // calculate the indices used in the current thread
         const std::size_t feature = idx / blocked_num_classes;
         const std::size_t c = idx % blocked_num_classes;
-      
+
         const std::size_t feature_idx = feature * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
 
@@ -124,7 +124,7 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
         // calculate the indices used in the current thread
         const std::size_t pp = idx / blocked_num_classes;
         const std::size_t c = idx % blocked_num_classes;
-       
+
         const std::size_t pp_idx = pp * INTERNAL_BLOCK_SIZE_uz;
         const std::size_t class_idx = c * INTERNAL_BLOCK_SIZE_uz;
 
diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index 194cca9e5..ee23fb2fd 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -227,7 +227,7 @@ inline void finalize_backend([[maybe_unused]] const backend_type backend) {
     // only have to perform special initialization steps for the HPX backend
 #if defined(PLSSVM_HAS_HPX_BACKEND)
     if (backend == backend_type::hpx) {
-        ::hpx::post( []{::hpx::finalize();} );
+        ::hpx::post([] { ::hpx::finalize(); });
     }
 #endif
 }
diff --git a/src/plssvm/backends/HPX/csvm.cpp b/src/plssvm/backends/HPX/csvm.cpp
index b682f63a6..c6adff43d 100644
--- a/src/plssvm/backends/HPX/csvm.cpp
+++ b/src/plssvm/backends/HPX/csvm.cpp
@@ -9,24 +9,24 @@
 
 #include "plssvm/backends/HPX/csvm.hpp"
 
+#include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
 #include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::detail::device_kernel_symm
 #include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::detail::device_kernel_assembly
 #include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::detail::device_kernel_assembly_symm
 #include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
-#include "plssvm/constants.hpp"                                                       // plssvm::real_type
-#include "plssvm/csvm.hpp"                                                            // plssvm::csvm
-#include "plssvm/detail/assert.hpp"                                                   // PLSSVM_ASSERT
-#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
-#include "plssvm/detail/memory_size.hpp"                                              // plssvm::detail::memory_size
-#include "plssvm/detail/move_only_any.hpp"                                            // plssvm::detail::{move_only_any, move_only_any_cast}
-#include "plssvm/detail/utility.hpp"                                                  // plssvm::detail::{get_system_memory, unreachable}
-#include "plssvm/backends/HPX/exceptions.hpp"                                         // plssvm::hpx::backend_exception
-#include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
-#include "plssvm/matrix.hpp"                                                          // plssvm::aos_matrix, plssvm::soa_matrix
-#include "plssvm/parameter.hpp"                                                       // plssvm::parameter
-#include "plssvm/shape.hpp"                                                           // plssvm::shape
-#include "plssvm/solver_types.hpp"                                                    // plssvm::solver_type
-#include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
+#include "plssvm/constants.hpp"                                                    // plssvm::real_type
+#include "plssvm/csvm.hpp"                                                         // plssvm::csvm
+#include "plssvm/detail/assert.hpp"                                                // PLSSVM_ASSERT
+#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
+#include "plssvm/detail/memory_size.hpp"                                           // plssvm::detail::memory_size
+#include "plssvm/detail/move_only_any.hpp"                                         // plssvm::detail::{move_only_any, move_only_any_cast}
+#include "plssvm/detail/utility.hpp"                                               // plssvm::detail::{get_system_memory, unreachable}
+#include "plssvm/kernel_function_types.hpp"                                        // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                       // plssvm::aos_matrix, plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                    // plssvm::parameter
+#include "plssvm/shape.hpp"                                                        // plssvm::shape
+#include "plssvm/solver_types.hpp"                                                 // plssvm::solver_type
+#include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
 
 #include <cstddef>  // std::size_t
 #include <tuple>    // std::tuple, std::make_tuple
@@ -84,48 +84,48 @@ std::vector<::plssvm::detail::move_only_any> csvm::assemble_kernel_matrix(const
     PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
 
     std::vector<::plssvm::detail::move_only_any> kernel_matrices_parts(this->num_available_devices());
-    ::hpx::future<void> wait = ::hpx::async([&](){
-    const real_type cost = real_type{ 1.0 } / params.cost;
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        const real_type cost = real_type{ 1.0 } / params.cost;
 
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
-                std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
-                        break;
-                }
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const plssvm::detail::triangular_data_distribution dist{ A.num_rows() - 1, this->num_available_devices() };
+                    std::vector<real_type> kernel_matrix(dist.calculate_explicit_kernel_matrix_num_entries_padded(0));  // only explicitly store the upper triangular matrix
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly<kernel_function_type::linear>(q_red, kernel_matrix, A, QA_cost, cost);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly<kernel_function_type::polynomial>(q_red, kernel_matrix, A, QA_cost, cost, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly<kernel_function_type::rbf>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly<kernel_function_type::sigmoid>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly<kernel_function_type::laplacian>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly<kernel_function_type::chi_squared>(q_red, kernel_matrix, A, QA_cost, cost, std::get<real_type>(params.gamma));
+                            break;
+                    }
 
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                // simply return data since in implicit we don't assembly the kernel matrix here!
-                kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
-            }
-            break;
-    }
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::move(kernel_matrix) };
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    // simply return data since in implicit we don't assembly the kernel matrix here!
+                    kernel_matrices_parts[0] = ::plssvm::detail::move_only_any{ std::make_tuple(std::move(A), params, std::move(q_red), QA_cost) };
+                }
+                break;
+        }
     });
     // wait until operation is completed
     wait.get();
@@ -142,52 +142,52 @@ void csvm::blas_level_3(const solver_type solver, const real_type alpha, const s
     PLSSVM_ASSERT(B.shape() == C.shape(), "The B ({}) and C ({}) matrices must have the same shape!", B.shape(), C.shape());
     PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
 
-    ::hpx::future<void> wait = ::hpx::async([&](){
-    switch (solver) {
-        case solver_type::automatic:
-            // unreachable
-            break;
-        case solver_type::cg_explicit:
-            {
-                const std::size_t num_rhs = B.shape().x;
-                const std::size_t num_rows = B.shape().y;
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        switch (solver) {
+            case solver_type::automatic:
+                // unreachable
+                break;
+            case solver_type::cg_explicit:
+                {
+                    const std::size_t num_rhs = B.shape().x;
+                    const std::size_t num_rows = B.shape().y;
 
-                const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
-                PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
+                    const auto &explicit_A = ::plssvm::detail::move_only_any_cast<const std::vector<real_type> &>(A.front());
+                    PLSSVM_ASSERT(!explicit_A.empty(), "The A matrix must not be empty!");
 
-                detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
-            }
-            break;
-        case solver_type::cg_implicit:
-            {
-                const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
-                PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
-                PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
-                const real_type cost = real_type{ 1.0 } / params.cost;
+                    detail::device_kernel_symm(num_rows, num_rhs, alpha, explicit_A, B, beta, C);
+                }
+                break;
+            case solver_type::cg_implicit:
+                {
+                    const auto &[matr_A, params, q_red, QA_cost] = ::plssvm::detail::move_only_any_cast<const std::tuple<soa_matrix<real_type>, parameter, std::vector<real_type>, real_type> &>(A.front());
+                    PLSSVM_ASSERT(!matr_A.empty(), "The A matrix must not be empty!");
+                    PLSSVM_ASSERT(!q_red.empty(), "The q_red vector must not be empty!");
+                    const real_type cost = real_type{ 1.0 } / params.cost;
 
-                switch (params.kernel_type) {
-                    case kernel_function_type::linear:
-                        detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
-                        break;
-                    case kernel_function_type::polynomial:
-                        detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::rbf:
-                        detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::sigmoid:
-                        detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
-                        break;
-                    case kernel_function_type::laplacian:
-                        detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
-                    case kernel_function_type::chi_squared:
-                        detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
-                        break;
+                    switch (params.kernel_type) {
+                        case kernel_function_type::linear:
+                            detail::device_kernel_assembly_symm<kernel_function_type::linear>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C);
+                            break;
+                        case kernel_function_type::polynomial:
+                            detail::device_kernel_assembly_symm<kernel_function_type::polynomial>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::rbf:
+                            detail::device_kernel_assembly_symm<kernel_function_type::rbf>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::sigmoid:
+                            detail::device_kernel_assembly_symm<kernel_function_type::sigmoid>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma), params.coef0);
+                            break;
+                        case kernel_function_type::laplacian:
+                            detail::device_kernel_assembly_symm<kernel_function_type::laplacian>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                        case kernel_function_type::chi_squared:
+                            detail::device_kernel_assembly_symm<kernel_function_type::chi_squared>(alpha, q_red, matr_A, QA_cost, cost, B, beta, C, std::get<real_type>(params.gamma));
+                            break;
+                    }
                 }
-            }
-            break;
-    }
+                break;
+        }
     });
     // wait until operation is completed
     wait.get();
@@ -223,39 +223,39 @@ aos_matrix<real_type> csvm::predict_values(const parameter &params,
 
     // num_predict_points x num_classes
     aos_matrix<real_type> out{ plssvm::shape{ num_predict_points, num_classes }, real_type{ 0.0 }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-    
-    ::hpx::future<void> wait = ::hpx::async([&](){
-    if (params.kernel_type == kernel_function_type::linear) {
-        // special optimization for the linear kernel function
-        if (w.empty()) {
-            // fill w vector
-            w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
-            detail::device_kernel_w_linear(w, alpha, support_vectors);
+
+    ::hpx::future<void> wait = ::hpx::async([&]() {
+        if (params.kernel_type == kernel_function_type::linear) {
+            // special optimization for the linear kernel function
+            if (w.empty()) {
+                // fill w vector
+                w = soa_matrix<real_type>{ plssvm::shape{ num_classes, num_features }, plssvm::shape{ PADDING_SIZE, PADDING_SIZE } };
+                detail::device_kernel_w_linear(w, alpha, support_vectors);
+            }
         }
-    }
 
-    // call the predict kernels
-    switch (params.kernel_type) {
-        case kernel_function_type::linear:
-            // predict the values using the w vector
-            detail::device_kernel_predict_linear(out, w, rho, predict_points);
-            break;
-        case kernel_function_type::polynomial:
-            detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::rbf:
-            detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::sigmoid:
-            detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
-            break;
-        case kernel_function_type::laplacian:
-            detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-        case kernel_function_type::chi_squared:
-            detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
-            break;
-    }
+        // call the predict kernels
+        switch (params.kernel_type) {
+            case kernel_function_type::linear:
+                // predict the values using the w vector
+                detail::device_kernel_predict_linear(out, w, rho, predict_points);
+                break;
+            case kernel_function_type::polynomial:
+                detail::device_kernel_predict<kernel_function_type::polynomial>(out, alpha, rho, support_vectors, predict_points, params.degree, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::rbf:
+                detail::device_kernel_predict<kernel_function_type::rbf>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::sigmoid:
+                detail::device_kernel_predict<kernel_function_type::sigmoid>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma), params.coef0);
+                break;
+            case kernel_function_type::laplacian:
+                detail::device_kernel_predict<kernel_function_type::laplacian>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+            case kernel_function_type::chi_squared:
+                detail::device_kernel_predict<kernel_function_type::chi_squared>(out, alpha, rho, support_vectors, predict_points, std::get<real_type>(params.gamma));
+                break;
+        }
     });
     // wait until operation is completed
     wait.get();
diff --git a/src/plssvm/backends/HPX/detail/utility.cpp b/src/plssvm/backends/HPX/detail/utility.cpp
index c36422661..c71c43507 100644
--- a/src/plssvm/backends/HPX/detail/utility.cpp
+++ b/src/plssvm/backends/HPX/detail/utility.cpp
@@ -7,11 +7,11 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include <hpx/runtime_distributed.hpp>       // ::hpx::get_num_worker_threads
-#include <hpx/version.hpp>                   // ::hpx::full_version_as_string
 #include "plssvm/backends/HPX/detail/utility.hpp"
 
-#include <string>  // std::string
+#include <hpx/runtime_distributed.hpp>  // ::hpx::get_num_worker_threads
+#include <hpx/version.hpp>              // ::hpx::full_version_as_string
+#include <string>                       // std::string
 
 namespace plssvm::hpx::detail {
 
diff --git a/tests/backends/HPX/hpx_csvm.cpp b/tests/backends/HPX/hpx_csvm.cpp
index 6797f2544..c1fd7df34 100644
--- a/tests/backends/HPX/hpx_csvm.cpp
+++ b/tests/backends/HPX/hpx_csvm.cpp
@@ -9,31 +9,31 @@
  * @brief Tests for the functionality related to the HPX backend.
  */
 
-#include "plssvm/backend_types.hpp"                                                   // plssvm::csvm_to_backend_type_v
+#include "plssvm/backend_types.hpp"                                                // plssvm::csvm_to_backend_type_v
 #include "plssvm/backends/HPX/csvm.hpp"                                            // plssvm::hpx::csvm
 #include "plssvm/backends/HPX/exceptions.hpp"                                      // plssvm::hpx::backend_exception
 #include "plssvm/backends/HPX/kernel/cg_explicit/blas.hpp"                         // plssvm::hpx::device_kernel_symm
 #include "plssvm/backends/HPX/kernel/cg_explicit/kernel_matrix_assembly.hpp"       // plssvm::hpx::device_kernel_assembly
 #include "plssvm/backends/HPX/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp"  // plssvm::hpx::device_kernel_assembly_symm
 #include "plssvm/backends/HPX/kernel/predict_kernel.hpp"                           // plssvm::hpx::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict}
-#include "plssvm/constants.hpp"                                                       // plssvm::PADDING_SIZE
-#include "plssvm/data_set.hpp"                                                        // plssvm::data_set
-#include "plssvm/detail/arithmetic_type_name.hpp"                                     // plssvm::detail::arithmetic_type_name
-#include "plssvm/detail/data_distribution.hpp"                                        // plssvm::detail::triangular_data_distribution
-#include "plssvm/detail/type_list.hpp"                                                // plssvm::detail::supported_label_types
-#include "plssvm/kernel_function_types.hpp"                                           // plssvm::kernel_function_type
-#include "plssvm/matrix.hpp"                                                          // plssvm::soa_matrix
-#include "plssvm/parameter.hpp"                                                       // plssvm::parameter, plssvm::detail::parameter, plssvm::kernel_type, plssvm::cost
-#include "plssvm/shape.hpp"                                                           // plssvm::shape
-#include "plssvm/target_platforms.hpp"                                                // plssvm::target_platform
-
-#include "tests/backends/generic_csvm_tests.hpp"       // generic CSVM tests to instantiate
-#include "tests/backends/ground_truth.hpp"             // ground_truth::{perform_dimensional_reduction, assemble_device_specific_kernel_matrix, assemble_full_kernel_matrix, gemm, calculate_w}
-#include "tests/backends/HPX/mock_hpx_csvm.hpp"  // mock_hpx_csvm
-#include "tests/custom_test_macros.hpp"                // EXPECT_THROW_WHAT
-#include "tests/naming.hpp"                            // naming::test_parameter_to_name
-#include "tests/types_to_test.hpp"                     // util::{cartesian_type_product_t, combine_test_parameters_gtest_t}
-#include "tests/utility.hpp"                           // util::redirect_output
+#include "plssvm/constants.hpp"                                                    // plssvm::PADDING_SIZE
+#include "plssvm/data_set.hpp"                                                     // plssvm::data_set
+#include "plssvm/detail/arithmetic_type_name.hpp"                                  // plssvm::detail::arithmetic_type_name
+#include "plssvm/detail/data_distribution.hpp"                                     // plssvm::detail::triangular_data_distribution
+#include "plssvm/detail/type_list.hpp"                                             // plssvm::detail::supported_label_types
+#include "plssvm/kernel_function_types.hpp"                                        // plssvm::kernel_function_type
+#include "plssvm/matrix.hpp"                                                       // plssvm::soa_matrix
+#include "plssvm/parameter.hpp"                                                    // plssvm::parameter, plssvm::detail::parameter, plssvm::kernel_type, plssvm::cost
+#include "plssvm/shape.hpp"                                                        // plssvm::shape
+#include "plssvm/target_platforms.hpp"                                             // plssvm::target_platform
+
+#include "tests/backends/generic_csvm_tests.hpp"  // generic CSVM tests to instantiate
+#include "tests/backends/ground_truth.hpp"        // ground_truth::{perform_dimensional_reduction, assemble_device_specific_kernel_matrix, assemble_full_kernel_matrix, gemm, calculate_w}
+#include "tests/backends/HPX/mock_hpx_csvm.hpp"   // mock_hpx_csvm
+#include "tests/custom_test_macros.hpp"           // EXPECT_THROW_WHAT
+#include "tests/naming.hpp"                       // naming::test_parameter_to_name
+#include "tests/types_to_test.hpp"                // util::{cartesian_type_product_t, combine_test_parameters_gtest_t}
+#include "tests/utility.hpp"                      // util::redirect_output
 
 #include "fmt/format.h"   // fmt::format
 #include "gtest/gtest.h"  // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test
@@ -44,7 +44,7 @@
 #include <vector>     // std::vector
 
 class HPXCSVM : public ::testing::Test,
-                   private util::redirect_output<> { };
+                private util::redirect_output<> { };
 
 // check whether the constructor correctly fails when using an incompatible target platform
 TEST_F(HPXCSVM, construct_parameter) {

From 30d9d6466424987fb2d31f39a90ebdd025f8390d Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 19 Nov 2024 11:22:22 +0100
Subject: [PATCH 40/48] Adjust tests/main.cpp to new environment.hpp

---
 tests/main.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/main.cpp b/tests/main.cpp
index 0623c7a26..1fc7b9aab 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -45,9 +45,7 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
 
 void ensure_finalization() {
-    if (!plssvm::environment::is_finalized()) {
-        plssvm::environment::finalize();
-    }
+    plssvm::environment::finalize();
 }
 
 int main(int argc, char **argv) {
@@ -55,6 +53,7 @@ int main(int argc, char **argv) {
 
     // initialize environments
     const plssvm::environment::scope_guard environment_guard{};
+    // Note: necessary for Kokkos::SYCL
     [[maybe_unused]] const int ret = std::atexit(ensure_finalization);
 
     // prevent problems with fork() in the presence of multiple threads

From d2968258ab7b6968a8e2bfaf2b2b4becb55d9878 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 19 Nov 2024 11:24:42 +0100
Subject: [PATCH 41/48] Add HPX combination types in tests

---
 tests/backend_types.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp
index fb9d3a565..4b0f27aae 100644
--- a/tests/backend_types.cpp
+++ b/tests/backend_types.cpp
@@ -109,6 +109,7 @@ TEST_P(BackendTypeUnsupportedCombination, unsupported_backend_target_platform_co
 INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeUnsupportedCombination, ::testing::Values(
          unsupported_combination_type{ { plssvm::backend_type::cuda, plssvm::backend_type::hip }, { plssvm::target_platform::cpu } },
          unsupported_combination_type{ { plssvm::backend_type::openmp }, { plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
+         unsupported_combination_type{ { plssvm::backend_type::hpx }, { plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
          unsupported_combination_type{ { plssvm::backend_type::cuda }, { plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel } },
          unsupported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::gpu_intel } }),
          naming::pretty_print_unsupported_backend_combination<BackendTypeUnsupportedCombination>);
@@ -126,6 +127,7 @@ TEST_P(BackendTypeSupportedCombination, supported_backend_target_platform_combin
 // clang-format off
 INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing::Values(
          supported_combination_type{ { plssvm::backend_type::openmp }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::openmp },
+         supported_combination_type{ { plssvm::backend_type::hpx }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hpx },
          supported_combination_type{ { plssvm::backend_type::stdpar }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::stdpar },
          supported_combination_type{ { plssvm::backend_type::cuda }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::cuda },
          supported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hip },
@@ -141,8 +143,8 @@ INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing
 TEST(BackendType, csvm_to_backend_type) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::openmp::csvm>::value, plssvm::backend_type::openmp);
-    EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::stdpar::csvm>::value, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hpx::csvm>::value, plssvm::backend_type::hpx);
+    EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::stdpar::csvm>::value, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const plssvm::cuda::csvm>::value, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type<plssvm::hip::csvm &>::value, plssvm::backend_type::hip);
     EXPECT_EQ(plssvm::csvm_to_backend_type<const plssvm::opencl::csvm &>::value, plssvm::backend_type::opencl);
@@ -157,8 +159,8 @@ TEST(BackendType, csvm_to_backend_type) {
 TEST(BackendType, csvm_to_backend_type_v) {
     // test the type_trait
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::openmp::csvm>, plssvm::backend_type::openmp);
-    EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::stdpar::csvm>, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hpx::csvm>, plssvm::backend_type::hpx);
+    EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::stdpar::csvm>, plssvm::backend_type::stdpar);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const plssvm::cuda::csvm>, plssvm::backend_type::cuda);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<plssvm::hip::csvm &>, plssvm::backend_type::hip);
     EXPECT_EQ(plssvm::csvm_to_backend_type_v<const plssvm::opencl::csvm &>, plssvm::backend_type::opencl);

From 545e0fbca4eede822461bb81200d255b0b4dd2f2 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 19 Nov 2024 11:27:40 +0100
Subject: [PATCH 42/48] HPX runtime workaround for tests

---
 src/plssvm/backends/HPX/CMakeLists.txt | 2 +-
 tests/main.cpp                         | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index 8ebde5e46..066e364ea 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -40,7 +40,7 @@ set(PLSSVM_HPX_SOURCES
 # set target properties
 set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
 add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
-target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx)
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main)
 
 # additional compilation flags
 target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
diff --git a/tests/main.cpp b/tests/main.cpp
index 1fc7b9aab..76a00808c 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -15,6 +15,12 @@
 
 #include <cstdlib>  // std::atexit
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
+    // Workaround as Scope Guard not working properly with Google Test
+    // Run the entire main function in HPX rutime
+    #include <hpx/hpx_main.hpp>
+#endif
+
 // silence GTest warnings/test errors
 
 // generic CSVM tests

From e74f80faa490a3a741a91fe4effd97d232614631 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Mon, 25 Nov 2024 15:20:16 +0100
Subject: [PATCH 43/48] Separate main for HPX tests

---
 src/plssvm/backends/HPX/CMakeLists.txt |  2 +-
 tests/backends/HPX/CMakeLists.txt      | 29 +++++++++++-
 tests/hpx_main.cpp                     | 61 ++++++++++++++++++++++++++
 tests/main.cpp                         | 13 +++---
 4 files changed, 95 insertions(+), 10 deletions(-)
 create mode 100644 tests/hpx_main.cpp

diff --git a/src/plssvm/backends/HPX/CMakeLists.txt b/src/plssvm/backends/HPX/CMakeLists.txt
index 066e364ea..8ebde5e46 100644
--- a/src/plssvm/backends/HPX/CMakeLists.txt
+++ b/src/plssvm/backends/HPX/CMakeLists.txt
@@ -40,7 +40,7 @@ set(PLSSVM_HPX_SOURCES
 # set target properties
 set_local_and_parent(PLSSVM_HPX_BACKEND_LIBRARY_NAME plssvm-HPX)
 add_library(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_HPX_SOURCES})
-target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx HPX::wrap_main)
+target_link_libraries(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PUBLIC HPX::hpx)
 
 # additional compilation flags
 target_compile_options(${PLSSVM_HPX_BACKEND_LIBRARY_NAME} PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wconversion>)
diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
index d24f26e83..25b114622 100644
--- a/tests/backends/HPX/CMakeLists.txt
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -14,11 +14,36 @@ set(PLSSVM_HPX_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/hpx_csvm.cpp
 )
 
+# check if HPX can be enabled
+message(CHECK_START "Checking for HPX backend")
+
+find_package(HPX 1.9.0)
+
+if (NOT HPX_FOUND)
+    message(CHECK_FAIL "not found")
+    if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+        message(SEND_ERROR "Cannot find requested backend: HPX!")
+    endif ()
+    return()
+else ()
+    if (NOT DEFINED PLSSVM_CPU_TARGET_ARCHS)
+        if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
+            message(SEND_ERROR "Found requested HPX backend, but no \"cpu\" targets were specified!")
+        else ()
+            message(STATUS "Found HPX backend, but no \"cpu\" targets were specified!")
+        endif ()
+        message(CHECK_FAIL "skipped")
+        return()
+    endif ()
+endif ()
+message(CHECK_PASS "found ")
+
+
 # add test executable
-add_executable(${PLSSVM_HPX_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_HPX_TEST_SOURCES})
+add_executable(${PLSSVM_HPX_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../hpx_main.cpp ${PLSSVM_HPX_TEST_SOURCES})
 
 # link against test library
-target_link_libraries(${PLSSVM_HPX_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME})
+target_link_libraries(${PLSSVM_HPX_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME} HPX::hpx HPX::wrap_main)
 
 # add tests to google test
 include(GoogleTest)
diff --git a/tests/hpx_main.cpp b/tests/hpx_main.cpp
new file mode 100644
index 000000000..7398db9d0
--- /dev/null
+++ b/tests/hpx_main.cpp
@@ -0,0 +1,61 @@
+/**
+ * @file
+ * @author Alexander Van Craen
+ * @author Marcel Breyer
+ * @copyright 2018-today The PLSSVM project - All Rights Reserved
+ * @license This file is part of the PLSSVM project which is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
+ */
+
+#include "plssvm/environment.hpp"  // plssvm::environment::scope_guard
+
+#include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
+
+#include <cstdlib>  // std::atexit
+
+// Workaround as HPX runtime not working properly with Google Test
+// Run the entire main function in HPX runtime
+#include <hpx/hpx_main.hpp>
+
+// silence GTest warnings/test errors
+
+// generic CSVM tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest);
+// generic GPU CSVM tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest);
+// pinned memory tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemory);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayout);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryDeathTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayoutDeathTest);
+// device pointer tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest);
+// exception tests
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception);
+
+int main(int argc, char **argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+
+    // prevent problems with fork() in the presence of multiple threads
+    // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads
+    // NOTE: may reduce performance of the (death) tests
+#if !defined(_WIN32)
+    ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
+#endif
+    return RUN_ALL_TESTS();
+}
diff --git a/tests/main.cpp b/tests/main.cpp
index 76a00808c..aef1550aa 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -9,18 +9,12 @@
  * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
  */
 
-#include "plssvm/environment.hpp"  // plssvm::environment::scope_guard
+#include "plssvm/environment.hpp"  // plssvm::environment::{scope_guard, initialize, finalize}
 
 #include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
 
 #include <cstdlib>  // std::atexit
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    // Workaround as Scope Guard not working properly with Google Test
-    // Run the entire main function in HPX rutime
-    #include <hpx/hpx_main.hpp>
-#endif
-
 // silence GTest warnings/test errors
 
 // generic CSVM tests
@@ -57,10 +51,15 @@ void ensure_finalization() {
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
 
+#if defined(PLSSVM_HAS_HPX_BACKEND)
     // initialize environments
+    plssvm::environment::initialize();
+#else
+    // initialize environments and manage lifetime with Scope Guard
     const plssvm::environment::scope_guard environment_guard{};
     // Note: necessary for Kokkos::SYCL
     [[maybe_unused]] const int ret = std::atexit(ensure_finalization);
+#endif
 
     // prevent problems with fork() in the presence of multiple threads
     // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads

From db447662cabf1eaf7102fb4ee34009c558ff1dd2 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 26 Nov 2024 18:48:29 +0100
Subject: [PATCH 44/48] Remove unused header

---
 tests/hpx_main.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/hpx_main.cpp b/tests/hpx_main.cpp
index 7398db9d0..dd8fdb984 100644
--- a/tests/hpx_main.cpp
+++ b/tests/hpx_main.cpp
@@ -2,6 +2,7 @@
  * @file
  * @author Alexander Van Craen
  * @author Marcel Breyer
+ * @author Alexander Strack
  * @copyright 2018-today The PLSSVM project - All Rights Reserved
  * @license This file is part of the PLSSVM project which is released under the MIT license.
  *          See the LICENSE.md file in the project root for full license information.
@@ -9,8 +10,6 @@
  * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast".
  */
 
-#include "plssvm/environment.hpp"  // plssvm::environment::scope_guard
-
 #include "gtest/gtest.h"  // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions
 
 #include <cstdlib>  // std::atexit

From 192a9bd8e047e1225ed8a26b2978da17ebac4ee0 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 26 Nov 2024 18:49:59 +0100
Subject: [PATCH 45/48] Adjust accuracy for failing tests

---
 tests/backends/HPX/hpx_csvm.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/backends/HPX/hpx_csvm.cpp b/tests/backends/HPX/hpx_csvm.cpp
index c1fd7df34..1f5fd9f48 100644
--- a/tests/backends/HPX/hpx_csvm.cpp
+++ b/tests/backends/HPX/hpx_csvm.cpp
@@ -192,7 +192,7 @@ TEST_F(HPXCSVM, calculate_w) {
     const plssvm::soa_matrix<plssvm::real_type> correct_w = ground_truth::calculate_w(weights, data.data());
 
     // check C for correctness
-    EXPECT_FLOATING_POINT_MATRIX_NEAR(w, correct_w);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(w, correct_w, 1e6);
 }
 
 using kernel_function_type_list_gtest = util::combine_test_parameters_gtest_t<util::kernel_function_type_list>;
@@ -350,7 +350,7 @@ TYPED_TEST(HPXCSVMKernelFunction, predict_values) {
 
     // check out for correctness
     const plssvm::aos_matrix<plssvm::real_type> correct_out = ground_truth::predict_values(params, correct_w, weights, rho, data_matr, predict_points);
-    EXPECT_FLOATING_POINT_MATRIX_NEAR(out, correct_out);
+    EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(out, correct_out, 1e6);
 }
 
 //*************************************************************************************************************************************//

From 7ac367dafd1dac6f999bc042fe1eaa7ff0c8bf42 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 26 Nov 2024 18:52:12 +0100
Subject: [PATCH 46/48] Remove unnecessary HPX lib check

---
 tests/backends/HPX/CMakeLists.txt | 27 ++-------------------------
 1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
index 25b114622..0498844f9 100644
--- a/tests/backends/HPX/CMakeLists.txt
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -4,6 +4,8 @@
 ##          See the LICENSE.md file in the project root for full license information.
 ########################################################################################################################
 
+find_package(HPX 1.9.0)
+
 ## create HPX tests
 set(PLSSVM_HPX_TEST_NAME HPX_tests)
 
@@ -14,31 +16,6 @@ set(PLSSVM_HPX_TEST_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/hpx_csvm.cpp
 )
 
-# check if HPX can be enabled
-message(CHECK_START "Checking for HPX backend")
-
-find_package(HPX 1.9.0)
-
-if (NOT HPX_FOUND)
-    message(CHECK_FAIL "not found")
-    if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
-        message(SEND_ERROR "Cannot find requested backend: HPX!")
-    endif ()
-    return()
-else ()
-    if (NOT DEFINED PLSSVM_CPU_TARGET_ARCHS)
-        if (PLSSVM_ENABLE_HPX_BACKEND MATCHES "ON")
-            message(SEND_ERROR "Found requested HPX backend, but no \"cpu\" targets were specified!")
-        else ()
-            message(STATUS "Found HPX backend, but no \"cpu\" targets were specified!")
-        endif ()
-        message(CHECK_FAIL "skipped")
-        return()
-    endif ()
-endif ()
-message(CHECK_PASS "found ")
-
-
 # add test executable
 add_executable(${PLSSVM_HPX_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../hpx_main.cpp ${PLSSVM_HPX_TEST_SOURCES})
 

From e738e2a2c755c9bc70a66a61cc949129872e3457 Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Tue, 26 Nov 2024 18:52:58 +0100
Subject: [PATCH 47/48] Fix HPX scope guard

---
 include/plssvm/environment.hpp | 1 +
 tests/main.cpp                 | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/plssvm/environment.hpp b/include/plssvm/environment.hpp
index ee23fb2fd..69a6dab24 100644
--- a/include/plssvm/environment.hpp
+++ b/include/plssvm/environment.hpp
@@ -228,6 +228,7 @@ inline void finalize_backend([[maybe_unused]] const backend_type backend) {
 #if defined(PLSSVM_HAS_HPX_BACKEND)
     if (backend == backend_type::hpx) {
         ::hpx::post([] { ::hpx::finalize(); });
+        ::hpx::stop();
     }
 #endif
 }
diff --git a/tests/main.cpp b/tests/main.cpp
index aef1550aa..247303148 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -51,15 +51,10 @@ void ensure_finalization() {
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
 
-#if defined(PLSSVM_HAS_HPX_BACKEND)
-    // initialize environments
-    plssvm::environment::initialize();
-#else
     // initialize environments and manage lifetime with Scope Guard
     const plssvm::environment::scope_guard environment_guard{};
     // Note: necessary for Kokkos::SYCL
     [[maybe_unused]] const int ret = std::atexit(ensure_finalization);
-#endif
 
     // prevent problems with fork() in the presence of multiple threads
     // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads

From c5c7dab03b879b002b6379be57319658c066a14b Mon Sep 17 00:00:00 2001
From: constracktor <strack.as@icloud.com>
Date: Wed, 27 Nov 2024 08:41:31 +0100
Subject: [PATCH 48/48] Make HPX package required

---
 tests/backends/HPX/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/backends/HPX/CMakeLists.txt b/tests/backends/HPX/CMakeLists.txt
index 0498844f9..194c132fe 100644
--- a/tests/backends/HPX/CMakeLists.txt
+++ b/tests/backends/HPX/CMakeLists.txt
@@ -4,7 +4,7 @@
 ##          See the LICENSE.md file in the project root for full license information.
 ########################################################################################################################
 
-find_package(HPX 1.9.0)
+find_package(HPX 1.9.0 REQUIRED)
 
 ## create HPX tests
 set(PLSSVM_HPX_TEST_NAME HPX_tests)