diff --git a/ci/run_ctests.sh b/ci/run_ctests.sh
index 26f8aae5b..9258f6402 100755
--- a/ci/run_ctests.sh
+++ b/ci/run_ctests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -26,3 +26,7 @@ for gt in "${GTEST_DIR}"/*_TEST; do
     echo "Running gtest ${test_name}"
     "${gt}" "$@"
 done
+
+# Run C_API_TEST with CPU memory for local solves (excluding time limit tests)
+echo "Running gtest C_API_TEST with CUOPT_USE_CPU_MEM_FOR_LOCAL"
+CUOPT_USE_CPU_MEM_FOR_LOCAL=1 "${GTEST_DIR}/C_API_TEST" --gtest_filter=-c_api/TimeLimitTestFixture.* "$@"
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index 5023cefc6..c05b37091 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -1,12 +1,14 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
@@ -89,7 +91,6 @@ int run_single_file(const std::string& file_path,
                     bool solve_relaxation,
                     const std::map<std::string, std::string>& settings_strings)
 {
-  const raft::handle_t handle_{};
   cuopt::linear_programming::solver_settings_t<int, double> settings;
 
   try {
@@ -122,13 +123,31 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
-  auto op_problem =
-    cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
+  // Determine memory backend and create problem using interface
+  // Create handle only for GPU memory backend (avoid CUDA init on CPU-only hosts)
+  auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
+  std::unique_ptr<raft::handle_t> handle_ptr;
+  std::unique_ptr<cuopt::linear_programming::optimization_problem_interface_t<int, double>>
+    problem_interface;
+
+  if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
+    handle_ptr = std::make_unique<raft::handle_t>();
+    problem_interface =
+      std::make_unique<cuopt::linear_programming::gpu_optimization_problem_t<int, double>>(
+        handle_ptr.get());
+  } else {
+    problem_interface =
+      std::make_unique<cuopt::linear_programming::cpu_optimization_problem_t<int, double>>(nullptr);
+  }
+
+  // Populate the problem from MPS data model
+  cuopt::linear_programming::populate_from_mps_data_model(problem_interface.get(), mps_data_model);
 
-  const bool is_mip =
-    (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
-     op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) &&
-    !solve_relaxation;
+  const bool is_mip = (problem_interface->get_problem_category() ==
+                         cuopt::linear_programming::problem_category_t::MIP ||
+                       problem_interface->get_problem_category() ==
+                         cuopt::linear_programming::problem_category_t::IP) &&
+                      !solve_relaxation;
 
   try {
     auto initial_solution =
@@ -157,10 +176,10 @@ int run_single_file(const std::string& file_path,
   try {
     if (is_mip) {
       auto& mip_settings = settings.get_mip_settings();
-      auto solution      = cuopt::linear_programming::solve_mip(op_problem, mip_settings);
+      auto solution = cuopt::linear_programming::solve_mip(problem_interface.get(), mip_settings);
     } else {
       auto& lp_settings = settings.get_pdlp_settings();
-      auto solution     = cuopt::linear_programming::solve_lp(op_problem, lp_settings);
+      auto solution     = cuopt::linear_programming::solve_lp(problem_interface.get(), lp_settings);
     }
   } catch (const std::exception& e) {
     CUOPT_LOG_ERROR("Error: %s", e.what());
@@ -334,19 +353,23 @@ int main(int argc, char* argv[])
   const auto initial_solution_file = program.get<std::string>("--initial-solution");
   const auto solve_relaxation      = program.get<bool>("--relaxation");
 
-  // All arguments are parsed as string, default values are parsed as int if unused.
-  const auto num_gpus = program.is_used("--num-gpus")
-                          ? std::stoi(program.get<std::string>("--num-gpus"))
-                          : program.get<int>("--num-gpus");
-
+  // Only initialize CUDA resources if using GPU memory backend (not remote execution)
+  auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
   std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
 
-  for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
-    cudaSetDevice(i);
-    memory_resources.push_back(make_async());
-    rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+  if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
+    // All arguments are parsed as string, default values are parsed as int if unused.
+    const auto num_gpus = program.is_used("--num-gpus")
+                            ? std::stoi(program.get<std::string>("--num-gpus"))
+                            : program.get<int>("--num-gpus");
+
+    for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
+      cudaSetDevice(i);
+      memory_resources.push_back(make_async());
+      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+    }
+    cudaSetDevice(0);
   }
-  cudaSetDevice(0);
 
   return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings);
 }
diff --git a/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp
new file mode 100644
index 000000000..d61e09ca5
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp
@@ -0,0 +1,565 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/mip/solver_stats.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <cuopt/linear_programming/utilities/cython_types.hpp>
+
+#include <raft/core/copy.hpp>
+
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief CPU-backed LP solution (uses std::vector instead of rmm::device_uvector)
+ *
+ * This class stores solution data in host memory (std::vector).
+ * Used for remote execution where GPU memory is not available.
+ */
+template <typename i_t, typename f_t>
+class cpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
+ public:
+  // Bring base class overloads into scope to avoid hiding warnings
+  using lp_solution_interface_t<i_t, f_t>::get_objective_value;
+  using lp_solution_interface_t<i_t, f_t>::get_dual_objective_value;
+
+  /**
+   * @brief Construct an empty CPU LP solution (for errors)
+   */
+  cpu_lp_solution_t(pdlp_termination_status_t termination_status, cuopt::logic_error error_status)
+    : termination_status_(termination_status),
+      error_status_(error_status),
+      solve_time_(0.0),
+      primal_objective_(std::numeric_limits<f_t>::signaling_NaN()),
+      dual_objective_(std::numeric_limits<f_t>::signaling_NaN())
+  {
+  }
+
+  /**
+   * @brief Construct CPU LP solution with data (basic version for stubs)
+   */
+  cpu_lp_solution_t(std::vector<f_t>&& primal_solution,
+                    std::vector<f_t>&& dual_solution,
+                    std::vector<f_t>&& reduced_cost,
+                    pdlp_termination_status_t termination_status,
+                    f_t primal_objective,
+                    f_t dual_objective,
+                    double solve_time)
+    : primal_solution_(std::move(primal_solution)),
+      dual_solution_(std::move(dual_solution)),
+      reduced_cost_(std::move(reduced_cost)),
+      termination_status_(termination_status),
+      error_status_("", cuopt::error_type_t::Success),
+      solve_time_(solve_time),
+      primal_objective_(primal_objective),
+      dual_objective_(dual_objective),
+      l2_primal_residual_(0.0),
+      l2_dual_residual_(0.0),
+      gap_(0.0),
+      num_iterations_(0),
+      solved_by_pdlp_(false)
+  {
+  }
+
+  /**
+   * @brief Construct CPU LP solution with complete termination info
+   * Used for real remote execution when we have all the data
+   */
+  cpu_lp_solution_t(std::vector<f_t>&& primal_solution,
+                    std::vector<f_t>&& dual_solution,
+                    std::vector<f_t>&& reduced_cost,
+                    pdlp_termination_status_t termination_status,
+                    f_t primal_objective,
+                    f_t dual_objective,
+                    double solve_time,
+                    f_t l2_primal_residual,
+                    f_t l2_dual_residual,
+                    f_t gap,
+                    i_t num_iterations,
+                    bool solved_by_pdlp)
+    : primal_solution_(std::move(primal_solution)),
+      dual_solution_(std::move(dual_solution)),
+      reduced_cost_(std::move(reduced_cost)),
+      termination_status_(termination_status),
+      error_status_("", cuopt::error_type_t::Success),
+      solve_time_(solve_time),
+      primal_objective_(primal_objective),
+      dual_objective_(dual_objective),
+      l2_primal_residual_(l2_primal_residual),
+      l2_dual_residual_(l2_dual_residual),
+      gap_(gap),
+      num_iterations_(num_iterations),
+      solved_by_pdlp_(solved_by_pdlp)
+  {
+  }
+
+  /**
+   * @brief Construct CPU LP solution with complete data including warm start
+   * Used for real remote execution with warm start support
+   */
+  cpu_lp_solution_t(std::vector<f_t>&& primal_solution,
+                    std::vector<f_t>&& dual_solution,
+                    std::vector<f_t>&& reduced_cost,
+                    pdlp_termination_status_t termination_status,
+                    f_t primal_objective,
+                    f_t dual_objective,
+                    double solve_time,
+                    f_t l2_primal_residual,
+                    f_t l2_dual_residual,
+                    f_t gap,
+                    i_t num_iterations,
+                    bool solved_by_pdlp,
+                    std::vector<f_t>&& current_primal_solution_ws,
+                    std::vector<f_t>&& current_dual_solution_ws,
+                    std::vector<f_t>&& initial_primal_average_ws,
+                    std::vector<f_t>&& initial_dual_average_ws,
+                    std::vector<f_t>&& current_ATY_ws,
+                    std::vector<f_t>&& sum_primal_solutions_ws,
+                    std::vector<f_t>&& sum_dual_solutions_ws,
+                    std::vector<f_t>&& last_restart_duality_gap_primal_solution_ws,
+                    std::vector<f_t>&& last_restart_duality_gap_dual_solution_ws,
+                    f_t initial_primal_weight,
+                    f_t initial_step_size,
+                    i_t total_pdlp_iterations,
+                    i_t total_pdhg_iterations,
+                    f_t last_candidate_kkt_score,
+                    f_t last_restart_kkt_score,
+                    f_t sum_solution_weight,
+                    i_t iterations_since_last_restart)
+    : primal_solution_(std::move(primal_solution)),
+      dual_solution_(std::move(dual_solution)),
+      reduced_cost_(std::move(reduced_cost)),
+      termination_status_(termination_status),
+      error_status_("", cuopt::error_type_t::Success),
+      solve_time_(solve_time),
+      primal_objective_(primal_objective),
+      dual_objective_(dual_objective),
+      l2_primal_residual_(l2_primal_residual),
+      l2_dual_residual_(l2_dual_residual),
+      gap_(gap),
+      num_iterations_(num_iterations),
+      solved_by_pdlp_(solved_by_pdlp)
+  {
+    // Initialize warmstart data
+    pdlp_warm_start_data_.current_primal_solution_ = std::move(current_primal_solution_ws);
+    pdlp_warm_start_data_.current_dual_solution_   = std::move(current_dual_solution_ws);
+    pdlp_warm_start_data_.initial_primal_average_  = std::move(initial_primal_average_ws);
+    pdlp_warm_start_data_.initial_dual_average_    = std::move(initial_dual_average_ws);
+    pdlp_warm_start_data_.current_ATY_             = std::move(current_ATY_ws);
+    pdlp_warm_start_data_.sum_primal_solutions_    = std::move(sum_primal_solutions_ws);
+    pdlp_warm_start_data_.sum_dual_solutions_      = std::move(sum_dual_solutions_ws);
+    pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_ =
+      std::move(last_restart_duality_gap_primal_solution_ws);
+    pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_ =
+      std::move(last_restart_duality_gap_dual_solution_ws);
+    pdlp_warm_start_data_.initial_primal_weight_         = initial_primal_weight;
+    pdlp_warm_start_data_.initial_step_size_             = initial_step_size;
+    pdlp_warm_start_data_.total_pdlp_iterations_         = total_pdlp_iterations;
+    pdlp_warm_start_data_.total_pdhg_iterations_         = total_pdhg_iterations;
+    pdlp_warm_start_data_.last_candidate_kkt_score_      = last_candidate_kkt_score;
+    pdlp_warm_start_data_.last_restart_kkt_score_        = last_restart_kkt_score;
+    pdlp_warm_start_data_.sum_solution_weight_           = sum_solution_weight;
+    pdlp_warm_start_data_.iterations_since_last_restart_ = iterations_since_last_restart;
+  }
+
+  /**
+   * @brief Construct CPU LP solution with warmstart data struct (cleaner version)
+   * Used for remote execution with warmstart support
+   */
+  cpu_lp_solution_t(std::vector<f_t>&& primal_solution,
+                    std::vector<f_t>&& dual_solution,
+                    std::vector<f_t>&& reduced_cost,
+                    pdlp_termination_status_t termination_status,
+                    f_t primal_objective,
+                    f_t dual_objective,
+                    double solve_time,
+                    f_t l2_primal_residual,
+                    f_t l2_dual_residual,
+                    f_t gap,
+                    i_t num_iterations,
+                    bool solved_by_pdlp,
+                    cpu_pdlp_warm_start_data_t<i_t, f_t>&& warmstart_data)
+    : primal_solution_(std::move(primal_solution)),
+      dual_solution_(std::move(dual_solution)),
+      reduced_cost_(std::move(reduced_cost)),
+      termination_status_(termination_status),
+      error_status_("", cuopt::error_type_t::Success),
+      solve_time_(solve_time),
+      primal_objective_(primal_objective),
+      dual_objective_(dual_objective),
+      l2_primal_residual_(l2_primal_residual),
+      l2_dual_residual_(l2_dual_residual),
+      gap_(gap),
+      num_iterations_(num_iterations),
+      solved_by_pdlp_(solved_by_pdlp),
+      pdlp_warm_start_data_(std::move(warmstart_data))
+  {
+  }
+
+  // Host memory accessors (interface implementations)
+  const std::vector<f_t>& get_primal_solution_host() const override { return primal_solution_; }
+  const std::vector<f_t>& get_dual_solution_host() const override { return dual_solution_; }
+  const std::vector<f_t>& get_reduced_cost_host() const override { return reduced_cost_; }
+
+  // Interface implementations
+  cuopt::logic_error get_error_status() const override { return error_status_; }
+
+  f_t get_solve_time() const override { return solve_time_; }
+
+  i_t get_primal_solution_size() const override { return primal_solution_.size(); }
+
+  i_t get_dual_solution_size() const override { return dual_solution_.size(); }
+
+  i_t get_reduced_cost_size() const override { return reduced_cost_.size(); }
+
+  f_t get_objective_value(i_t = 0) const override { return primal_objective_; }
+
+  f_t get_dual_objective_value(i_t = 0) const override { return dual_objective_; }
+
+  pdlp_termination_status_t get_termination_status(i_t = 0) const override
+  {
+    return termination_status_;
+  }
+
+  f_t get_l2_primal_residual(i_t = 0) const override { return l2_primal_residual_; }
+
+  f_t get_l2_dual_residual(i_t = 0) const override { return l2_dual_residual_; }
+
+  f_t get_gap(i_t = 0) const override { return gap_; }
+
+  i_t get_num_iterations(i_t = 0) const override { return num_iterations_; }
+
+  bool is_solved_by_pdlp(i_t = 0) const override { return solved_by_pdlp_; }
+
+  const pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data() const override
+  {
+    throw cuopt::logic_error(
+      "PDLP warm start data not available for CPU solutions (use individual accessors)",
+      cuopt::error_type_t::RuntimeError);
+  }
+
+  bool has_warm_start_data() const override { return pdlp_warm_start_data_.is_populated(); }
+
+  // Warmstart data accessor - returns the CPU warmstart struct
+  const cpu_pdlp_warm_start_data_t<i_t, f_t>& get_cpu_pdlp_warm_start_data() const
+  {
+    return pdlp_warm_start_data_;
+  }
+
+  cpu_pdlp_warm_start_data_t<i_t, f_t>& get_cpu_pdlp_warm_start_data()
+  {
+    return pdlp_warm_start_data_;
+  }
+
+  // Individual warm start data accessors (return stored host vectors)
+  std::vector<f_t> get_current_primal_solution_host() const override
+  {
+    return pdlp_warm_start_data_.current_primal_solution_;
+  }
+  std::vector<f_t> get_current_dual_solution_host() const override
+  {
+    return pdlp_warm_start_data_.current_dual_solution_;
+  }
+  std::vector<f_t> get_initial_primal_average_host() const override
+  {
+    return pdlp_warm_start_data_.initial_primal_average_;
+  }
+  std::vector<f_t> get_initial_dual_average_host() const override
+  {
+    return pdlp_warm_start_data_.initial_dual_average_;
+  }
+  std::vector<f_t> get_current_ATY_host() const override
+  {
+    return pdlp_warm_start_data_.current_ATY_;
+  }
+  std::vector<f_t> get_sum_primal_solutions_host() const override
+  {
+    return pdlp_warm_start_data_.sum_primal_solutions_;
+  }
+  std::vector<f_t> get_sum_dual_solutions_host() const override
+  {
+    return pdlp_warm_start_data_.sum_dual_solutions_;
+  }
+  std::vector<f_t> get_last_restart_duality_gap_primal_solution_host() const override
+  {
+    return pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_;
+  }
+  std::vector<f_t> get_last_restart_duality_gap_dual_solution_host() const override
+  {
+    return pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_;
+  }
+  f_t get_initial_primal_weight() const override
+  {
+    return pdlp_warm_start_data_.initial_primal_weight_;
+  }
+  f_t get_initial_step_size() const override { return pdlp_warm_start_data_.initial_step_size_; }
+  i_t get_total_pdlp_iterations() const override
+  {
+    return pdlp_warm_start_data_.total_pdlp_iterations_;
+  }
+  i_t get_total_pdhg_iterations() const override
+  {
+    return pdlp_warm_start_data_.total_pdhg_iterations_;
+  }
+  f_t get_last_candidate_kkt_score() const override
+  {
+    return pdlp_warm_start_data_.last_candidate_kkt_score_;
+  }
+  f_t get_last_restart_kkt_score() const override
+  {
+    return pdlp_warm_start_data_.last_restart_kkt_score_;
+  }
+  f_t get_sum_solution_weight() const override
+  {
+    return pdlp_warm_start_data_.sum_solution_weight_;
+  }
+  i_t get_iterations_since_last_restart() const override
+  {
+    return pdlp_warm_start_data_.iterations_since_last_restart_;
+  }
+
+  /**
+   * @brief Convert CPU solution to GPU solution
+   * Copies data from host (std::vector) to device (rmm::device_uvector)
+   */
+  optimization_problem_solution_t<i_t, f_t> to_gpu_solution(
+    rmm::cuda_stream_view stream_view) override
+  {
+    // Create device vectors and copy data from host
+    rmm::device_uvector<f_t> primal_device(primal_solution_.size(), stream_view);
+    rmm::device_uvector<f_t> dual_device(dual_solution_.size(), stream_view);
+    rmm::device_uvector<f_t> reduced_cost_device(reduced_cost_.size(), stream_view);
+
+    if (!primal_solution_.empty()) {
+      raft::copy(
+        primal_device.data(), primal_solution_.data(), primal_solution_.size(), stream_view);
+    }
+    if (!dual_solution_.empty()) {
+      raft::copy(dual_device.data(), dual_solution_.data(), dual_solution_.size(), stream_view);
+    }
+    if (!reduced_cost_.empty()) {
+      raft::copy(
+        reduced_cost_device.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view);
+    }
+
+    // Create termination information
+    using additional_info_t =
+      typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t;
+    std::vector<additional_info_t> termination_stats(1);
+    termination_stats[0].primal_objective = primal_objective_;
+    termination_stats[0].dual_objective   = dual_objective_;
+    termination_stats[0].solve_time       = solve_time_;
+
+    std::vector<pdlp_termination_status_t> termination_status_vec = {termination_status_};
+
+    // Create GPU solution
+    return optimization_problem_solution_t<i_t, f_t>(primal_device,
+                                                     dual_device,
+                                                     reduced_cost_device,
+                                                     "",  // objective_name
+                                                     {},  // var_names
+                                                     {},  // row_names
+                                                     std::move(termination_stats),
+                                                     std::move(termination_status_vec));
+  }
+
+  /**
+   * @brief Convert to CPU-backed cpu_linear_programming_ret_t struct for Python/Cython
+   * Moves std::vector data with zero-copy.
+   */
+  cuopt::cython::cpu_linear_programming_ret_t to_cpu_linear_programming_ret_t() &&;
+
+  /**
+   * @brief Polymorphic conversion to Python return type (interface override)
+   * Returns CPU variant (cpu_linear_programming_ret_t with std::vector)
+   */
+  std::variant<cuopt::cython::linear_programming_ret_t, cuopt::cython::cpu_linear_programming_ret_t>
+    to_python_lp_ret() && override
+  {
+    return std::move(*this).to_cpu_linear_programming_ret_t();
+  }
+
+ private:
+  std::vector<f_t> primal_solution_;
+  std::vector<f_t> dual_solution_;
+  std::vector<f_t> reduced_cost_;
+  pdlp_termination_status_t termination_status_;
+  cuopt::logic_error error_status_;
+  double solve_time_;
+  f_t primal_objective_;
+  f_t dual_objective_;
+  f_t l2_primal_residual_;
+  f_t l2_dual_residual_;
+  f_t gap_;
+  i_t num_iterations_;
+  bool solved_by_pdlp_;
+
+  // PDLP warm start data (embedded struct, CPU-backed using std::vector)
+  cpu_pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
+};
+
+/**
+ * @brief CPU-backed MIP solution (uses std::vector instead of rmm::device_uvector)
+ *
+ * This class stores solution data in host memory (std::vector).
+ * Used for remote execution where GPU memory is not available.
+ */
+template <typename i_t, typename f_t>
+class cpu_mip_solution_t : public mip_solution_interface_t<i_t, f_t> {
+ public:
+  /**
+   * @brief Construct an empty CPU MIP solution (for errors)
+   */
+  cpu_mip_solution_t(mip_termination_status_t termination_status, cuopt::logic_error error_status)
+    : termination_status_(termination_status),
+      error_status_(error_status),
+      objective_(std::numeric_limits<f_t>::signaling_NaN()),
+      mip_gap_(std::numeric_limits<f_t>::signaling_NaN()),
+      solution_bound_(std::numeric_limits<f_t>::signaling_NaN()),
+      total_solve_time_(0.0),
+      presolve_time_(0.0),
+      max_constraint_violation_(std::numeric_limits<f_t>::signaling_NaN()),
+      max_int_violation_(std::numeric_limits<f_t>::signaling_NaN()),
+      max_variable_bound_violation_(std::numeric_limits<f_t>::signaling_NaN()),
+      num_nodes_(0),
+      num_simplex_iterations_(0)
+  {
+  }
+
+  /**
+   * @brief Construct CPU MIP solution with data
+   */
+  cpu_mip_solution_t(std::vector<f_t>&& solution,
+                     mip_termination_status_t termination_status,
+                     f_t objective,
+                     f_t mip_gap,
+                     f_t solution_bound,
+                     double total_solve_time,
+                     double presolve_time,
+                     f_t max_constraint_violation,
+                     f_t max_int_violation,
+                     f_t max_variable_bound_violation,
+                     i_t num_nodes,
+                     i_t num_simplex_iterations)
+    : solution_(std::move(solution)),
+      termination_status_(termination_status),
+      error_status_("", cuopt::error_type_t::Success),
+      objective_(objective),
+      mip_gap_(mip_gap),
+      solution_bound_(solution_bound),
+      total_solve_time_(total_solve_time),
+      presolve_time_(presolve_time),
+      max_constraint_violation_(max_constraint_violation),
+      max_int_violation_(max_int_violation),
+      max_variable_bound_violation_(max_variable_bound_violation),
+      num_nodes_(num_nodes),
+      num_simplex_iterations_(num_simplex_iterations)
+  {
+  }
+
+  // Host memory accessor (interface implementation)
+  const std::vector<f_t>& get_solution_host() const override { return solution_; }
+
+  // Interface implementations
+  cuopt::logic_error get_error_status() const override { return error_status_; }
+
+  f_t get_solve_time() const override { return total_solve_time_; }
+
+  i_t get_solution_size() const override { return solution_.size(); }
+
+  f_t get_objective_value() const override { return objective_; }
+
+  f_t get_mip_gap() const override { return mip_gap_; }
+
+  f_t get_solution_bound() const override { return solution_bound_; }
+
+  mip_termination_status_t get_termination_status() const override { return termination_status_; }
+
+  f_t get_presolve_time() const override { return presolve_time_; }
+
+  f_t get_max_constraint_violation() const override { return max_constraint_violation_; }
+
+  f_t get_max_int_violation() const override { return max_int_violation_; }
+
+  f_t get_max_variable_bound_violation() const override { return max_variable_bound_violation_; }
+
+  i_t get_num_nodes() const override { return num_nodes_; }
+
+  i_t get_num_simplex_iterations() const override { return num_simplex_iterations_; }
+
+  /**
+   * @brief Convert CPU solution to GPU solution
+   * Copies data from host (std::vector) to device (rmm::device_uvector)
+   */
+  mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view stream_view) override
+  {
+    // Create device vector and copy data from host
+    rmm::device_uvector<f_t> solution_device(solution_.size(), stream_view);
+
+    if (!solution_.empty()) {
+      raft::copy(solution_device.data(), solution_.data(), solution_.size(), stream_view);
+    }
+
+    // Create solver stats
+    solver_stats_t<i_t, f_t> stats;
+    stats.total_solve_time       = total_solve_time_;
+    stats.presolve_time          = presolve_time_;
+    stats.solution_bound         = solution_bound_;
+    stats.num_nodes              = num_nodes_;
+    stats.num_simplex_iterations = num_simplex_iterations_;
+
+    // Create GPU solution
+    return mip_solution_t<i_t, f_t>(std::move(solution_device),
+                                    {},  // var_names
+                                    objective_,
+                                    mip_gap_,
+                                    termination_status_,
+                                    max_constraint_violation_,
+                                    max_int_violation_,
+                                    max_variable_bound_violation_,
+                                    stats);
+  }
+
+  /**
+   * @brief Convert to CPU-backed cpu_mip_ret_t struct for Python/Cython
+   * Moves std::vector data with zero-copy.
+   */
+  cuopt::cython::cpu_mip_ret_t to_cpu_mip_ret_t() &&;
+
+  /**
+   * @brief Polymorphic conversion to Python return type (interface override)
+   * Returns CPU variant (cpu_mip_ret_t with std::vector)
+   */
+  std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t> to_python_mip_ret() &&
+    override
+  {
+    return std::move(*this).to_cpu_mip_ret_t();
+  }
+
+ private:
+  std::vector<f_t> solution_;
+  mip_termination_status_t termination_status_;
+  cuopt::logic_error error_status_;
+  f_t objective_;
+  f_t mip_gap_;
+  f_t solution_bound_;
+  double total_solve_time_;
+  double presolve_time_;
+  f_t max_constraint_violation_;
+  f_t max_int_violation_;
+  f_t max_variable_bound_violation_;
+  i_t num_nodes_;
+  i_t num_simplex_iterations_;
+};
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp b/cpp/include/cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp
new file mode 100644
index 000000000..61d420976
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp
@@ -0,0 +1,116 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// CPU version of pdlp_warm_start_data_t using std::vector for remote execution
+template <typename i_t, typename f_t>
+struct cpu_pdlp_warm_start_data_t {
+  std::vector<f_t> current_primal_solution_;
+  std::vector<f_t> current_dual_solution_;
+  std::vector<f_t> initial_primal_average_;
+  std::vector<f_t> initial_dual_average_;
+  std::vector<f_t> current_ATY_;
+  std::vector<f_t> sum_primal_solutions_;
+  std::vector<f_t> sum_dual_solutions_;
+  std::vector<f_t> last_restart_duality_gap_primal_solution_;
+  std::vector<f_t> last_restart_duality_gap_dual_solution_;
+  f_t initial_primal_weight_{-1};
+  f_t initial_step_size_{-1};
+  i_t total_pdlp_iterations_{-1};
+  i_t total_pdhg_iterations_{-1};
+  f_t last_candidate_kkt_score_{-1};
+  f_t last_restart_kkt_score_{-1};
+  f_t sum_solution_weight_{-1};
+  i_t iterations_since_last_restart_{-1};
+
+  // Default constructor
+  cpu_pdlp_warm_start_data_t() = default;
+
+  // Constructor from view (copies span data to vectors)
+  cpu_pdlp_warm_start_data_t(const pdlp_warm_start_data_view_t<i_t, f_t>& view)
+    : initial_primal_weight_(view.initial_primal_weight_),
+      initial_step_size_(view.initial_step_size_),
+      total_pdlp_iterations_(view.total_pdlp_iterations_),
+      total_pdhg_iterations_(view.total_pdhg_iterations_),
+      last_candidate_kkt_score_(view.last_candidate_kkt_score_),
+      last_restart_kkt_score_(view.last_restart_kkt_score_),
+      sum_solution_weight_(view.sum_solution_weight_),
+      iterations_since_last_restart_(view.iterations_since_last_restart_)
+  {
+    // Copy vector data from spans
+    if (view.current_primal_solution_.data() != nullptr) {
+      current_primal_solution_.assign(
+        view.current_primal_solution_.data(),
+        view.current_primal_solution_.data() + view.current_primal_solution_.size());
+    }
+    if (view.current_dual_solution_.data() != nullptr) {
+      current_dual_solution_.assign(
+        view.current_dual_solution_.data(),
+        view.current_dual_solution_.data() + view.current_dual_solution_.size());
+    }
+    if (view.initial_primal_average_.data() != nullptr) {
+      initial_primal_average_.assign(
+        view.initial_primal_average_.data(),
+        view.initial_primal_average_.data() + view.initial_primal_average_.size());
+    }
+    if (view.initial_dual_average_.data() != nullptr) {
+      initial_dual_average_.assign(
+        view.initial_dual_average_.data(),
+        view.initial_dual_average_.data() + view.initial_dual_average_.size());
+    }
+    if (view.current_ATY_.data() != nullptr) {
+      current_ATY_.assign(view.current_ATY_.data(),
+                          view.current_ATY_.data() + view.current_ATY_.size());
+    }
+    if (view.sum_primal_solutions_.data() != nullptr) {
+      sum_primal_solutions_.assign(
+        view.sum_primal_solutions_.data(),
+        view.sum_primal_solutions_.data() + view.sum_primal_solutions_.size());
+    }
+    if (view.sum_dual_solutions_.data() != nullptr) {
+      sum_dual_solutions_.assign(view.sum_dual_solutions_.data(),
+                                 view.sum_dual_solutions_.data() + view.sum_dual_solutions_.size());
+    }
+    if (view.last_restart_duality_gap_primal_solution_.data() != nullptr) {
+      last_restart_duality_gap_primal_solution_.assign(
+        view.last_restart_duality_gap_primal_solution_.data(),
+        view.last_restart_duality_gap_primal_solution_.data() +
+          view.last_restart_duality_gap_primal_solution_.size());
+    }
+    if (view.last_restart_duality_gap_dual_solution_.data() != nullptr) {
+      last_restart_duality_gap_dual_solution_.assign(
+        view.last_restart_duality_gap_dual_solution_.data(),
+        view.last_restart_duality_gap_dual_solution_.data() +
+          view.last_restart_duality_gap_dual_solution_.size());
+    }
+  }
+
+  // Check if warmstart data is populated (same sentinel check as release/26.02)
+  bool is_populated() const { return !last_restart_duality_gap_dual_solution_.empty(); }
+};
+
+// Forward declare GPU type for conversion functions
+template <typename i_t, typename f_t>
+struct pdlp_warm_start_data_t;
+
+// Convert GPU → CPU warmstart (D2H copy)
+template <typename i_t, typename f_t>
+cpu_pdlp_warm_start_data_t<i_t, f_t> convert_to_cpu_warmstart(
+  const pdlp_warm_start_data_t<i_t, f_t>& gpu_data, rmm::cuda_stream_view stream);
+
+// Convert CPU → GPU warmstart (H2D copy)
+template <typename i_t, typename f_t>
+pdlp_warm_start_data_t<i_t, f_t> convert_to_gpu_warmstart(
+  const cpu_pdlp_warm_start_data_t<i_t, f_t>& cpu_data, rmm::cuda_stream_view stream);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp b/cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp
new file mode 100644
index 000000000..3871dc672
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/gpu_optimization_problem_solution.hpp
@@ -0,0 +1,518 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <cuopt/linear_programming/utilities/cython_types.hpp>
+
+#include <raft/core/copy.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+#include <optional>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief GPU-backed LP solution (wraps optimization_problem_solution_t)
+ *
+ * This class wraps the existing optimization_problem_solution_t which uses GPU memory.
+ * It implements the interface to allow polymorphism with CPU solutions.
+ */
+template <typename i_t, typename f_t>
+class gpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
+ public:
+  // Bring base class overloads into scope to avoid hiding warnings
+  using lp_solution_interface_t<i_t, f_t>::get_objective_value;
+  using lp_solution_interface_t<i_t, f_t>::get_dual_objective_value;
+
+  /**
+   * @brief Construct from existing optimization_problem_solution_t (move)
+   */
+  explicit gpu_lp_solution_t(optimization_problem_solution_t<i_t, f_t>&& solution)
+    : solution_(std::move(solution))
+  {
+  }
+
+  /**
+   * @brief Get the underlying GPU solution
+   */
+  optimization_problem_solution_t<i_t, f_t>& get_solution() { return solution_; }
+  const optimization_problem_solution_t<i_t, f_t>& get_solution() const { return solution_; }
+
+  // Interface implementations
+  cuopt::logic_error get_error_status() const override { return solution_.get_error_status(); }
+
+  f_t get_solve_time() const override { return static_cast<f_t>(solution_.get_solve_time()); }
+
+  i_t get_primal_solution_size() const override { return solution_.get_primal_solution().size(); }
+
+  i_t get_dual_solution_size() const override { return solution_.get_dual_solution().size(); }
+
+  i_t get_reduced_cost_size() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_reduced_cost()
+      .size();
+  }
+
+  const std::vector<f_t>& get_primal_solution_host() const override
+  {
+    if (!primal_solution_host_cache_) {
+      // Copy from device to host using per-thread stream
+      auto stream                 = rmm::cuda_stream_per_thread;
+      primal_solution_host_cache_ = std::vector<f_t>(solution_.get_primal_solution().size());
+      raft::copy(primal_solution_host_cache_->data(),
+                 solution_.get_primal_solution().data(),
+                 solution_.get_primal_solution().size(),
+                 stream);
+      stream.synchronize();
+    }
+    return *primal_solution_host_cache_;
+  }
+
+  const std::vector<f_t>& get_dual_solution_host() const override
+  {
+    if (!dual_solution_host_cache_) {
+      auto stream               = rmm::cuda_stream_per_thread;
+      dual_solution_host_cache_ = std::vector<f_t>(solution_.get_dual_solution().size());
+      raft::copy(dual_solution_host_cache_->data(),
+                 solution_.get_dual_solution().data(),
+                 solution_.get_dual_solution().size(),
+                 stream);
+      stream.synchronize();
+    }
+    return *dual_solution_host_cache_;
+  }
+
+  const std::vector<f_t>& get_reduced_cost_host() const override
+  {
+    if (!reduced_cost_host_cache_) {
+      auto stream = rmm::cuda_stream_per_thread;
+      auto& reduced_cost =
+        const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_reduced_cost();
+      reduced_cost_host_cache_ = std::vector<f_t>(reduced_cost.size());
+      raft::copy(
+        reduced_cost_host_cache_->data(), reduced_cost.data(), reduced_cost.size(), stream);
+      stream.synchronize();
+    }
+    return *reduced_cost_host_cache_;
+  }
+
+  f_t get_objective_value(i_t id = 0) const override { return solution_.get_objective_value(id); }
+
+  f_t get_dual_objective_value(i_t id = 0) const override
+  {
+    return solution_.get_dual_objective_value(id);
+  }
+
+  pdlp_termination_status_t get_termination_status(i_t id = 0) const override
+  {
+    return solution_.get_termination_status(id);
+  }
+
+  f_t get_l2_primal_residual(i_t id = 0) const override
+  {
+    return solution_.get_additional_termination_information(id).l2_primal_residual;
+  }
+
+  f_t get_l2_dual_residual(i_t id = 0) const override
+  {
+    return solution_.get_additional_termination_information(id).l2_dual_residual;
+  }
+
+  f_t get_gap(i_t id = 0) const override
+  {
+    return solution_.get_additional_termination_information(id).gap;
+  }
+
+  i_t get_num_iterations(i_t id = 0) const override
+  {
+    return solution_.get_additional_termination_information(id).number_of_steps_taken;
+  }
+
+  bool is_solved_by_pdlp(i_t id = 0) const override
+  {
+    return solution_.get_additional_termination_information(id).solved_by_pdlp;
+  }
+
+  const pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data();
+  }
+
+  bool has_warm_start_data() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+             .get_pdlp_warm_start_data()
+             .current_primal_solution_.size() > 0;
+  }
+
+  // Individual warm start data accessors (copy from device to host)
+  std::vector<f_t> get_current_primal_solution_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.current_primal_solution_.size() == 0) return {};
+    std::vector<f_t> result(ws.current_primal_solution_.size());
+    raft::copy(result.data(),
+               ws.current_primal_solution_.data(),
+               ws.current_primal_solution_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_current_dual_solution_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.current_dual_solution_.size() == 0) return {};
+    std::vector<f_t> result(ws.current_dual_solution_.size());
+    raft::copy(result.data(),
+               ws.current_dual_solution_.data(),
+               ws.current_dual_solution_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_initial_primal_average_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.initial_primal_average_.size() == 0) return {};
+    std::vector<f_t> result(ws.initial_primal_average_.size());
+    raft::copy(result.data(),
+               ws.initial_primal_average_.data(),
+               ws.initial_primal_average_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_initial_dual_average_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.initial_dual_average_.size() == 0) return {};
+    std::vector<f_t> result(ws.initial_dual_average_.size());
+    raft::copy(result.data(),
+               ws.initial_dual_average_.data(),
+               ws.initial_dual_average_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_current_ATY_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.current_ATY_.size() == 0) return {};
+    std::vector<f_t> result(ws.current_ATY_.size());
+    raft::copy(
+      result.data(), ws.current_ATY_.data(), ws.current_ATY_.size(), rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_sum_primal_solutions_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.sum_primal_solutions_.size() == 0) return {};
+    std::vector<f_t> result(ws.sum_primal_solutions_.size());
+    raft::copy(result.data(),
+               ws.sum_primal_solutions_.data(),
+               ws.sum_primal_solutions_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_sum_dual_solutions_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.sum_dual_solutions_.size() == 0) return {};
+    std::vector<f_t> result(ws.sum_dual_solutions_.size());
+    raft::copy(result.data(),
+               ws.sum_dual_solutions_.data(),
+               ws.sum_dual_solutions_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_last_restart_duality_gap_primal_solution_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (ws.last_restart_duality_gap_primal_solution_.size() == 0) return {};
+    std::vector<f_t> result(ws.last_restart_duality_gap_primal_solution_.size());
+    raft::copy(result.data(),
+               ws.last_restart_duality_gap_primal_solution_.data(),
+               ws.last_restart_duality_gap_primal_solution_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  std::vector<f_t> get_last_restart_duality_gap_dual_solution_host() const override
+  {
+    auto& ws =
+      const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_).get_pdlp_warm_start_data();
+    if (!ws.is_populated()) return {};
+    std::vector<f_t> result(ws.last_restart_duality_gap_dual_solution_.size());
+    raft::copy(result.data(),
+               ws.last_restart_duality_gap_dual_solution_.data(),
+               ws.last_restart_duality_gap_dual_solution_.size(),
+               rmm::cuda_stream_per_thread);
+    rmm::cuda_stream_per_thread.synchronize();
+    return result;
+  }
+
+  f_t get_initial_primal_weight() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .initial_primal_weight_;
+  }
+  f_t get_initial_step_size() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .initial_step_size_;
+  }
+  i_t get_total_pdlp_iterations() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .total_pdlp_iterations_;
+  }
+  i_t get_total_pdhg_iterations() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .total_pdhg_iterations_;
+  }
+  f_t get_last_candidate_kkt_score() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .last_candidate_kkt_score_;
+  }
+  f_t get_last_restart_kkt_score() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .last_restart_kkt_score_;
+  }
+  f_t get_sum_solution_weight() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .sum_solution_weight_;
+  }
+  i_t get_iterations_since_last_restart() const override
+  {
+    return const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution_)
+      .get_pdlp_warm_start_data()
+      .iterations_since_last_restart_;
+  }
+
+  optimization_problem_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view) override
+  {
+    // Already GPU, just move
+    return std::move(solution_);
+  }
+
+  /**
+   * @brief Convert GPU solution to CPU solution
+   * Copies data from device to host for test mode or CPU-only environments.
+   * @return A new cpu_lp_solution_t with all data copied to host vectors
+   */
+  std::unique_ptr<cpu_lp_solution_t<i_t, f_t>> to_cpu_solution() const
+  {
+    // Copy solution data from device to host
+    auto primal_host  = get_primal_solution_host();
+    auto dual_host    = get_dual_solution_host();
+    auto reduced_host = get_reduced_cost_host();
+
+    // Create CPU solution with full termination info
+    return std::make_unique<cpu_lp_solution_t<i_t, f_t>>(
+      std::vector<f_t>(primal_host.begin(), primal_host.end()),
+      std::vector<f_t>(dual_host.begin(), dual_host.end()),
+      std::vector<f_t>(reduced_host.begin(), reduced_host.end()),
+      get_termination_status(),
+      get_objective_value(),
+      get_dual_objective_value(),
+      get_solve_time(),
+      get_l2_primal_residual(),
+      get_l2_dual_residual(),
+      get_gap(),
+      get_num_iterations(),
+      is_solved_by_pdlp());
+  }
+
+  /**
+   * @brief Convert to GPU-backed linear_programming_ret_t struct for Python/Cython
+   * Moves device_uvector data into device_buffer wrappers with zero-copy.
+   */
+  cuopt::cython::linear_programming_ret_t to_linear_programming_ret_t() &&;
+
+  /**
+   * @brief Polymorphic conversion to Python return type (interface override)
+   * Returns GPU variant (linear_programming_ret_t with device_buffer)
+   */
+  std::variant<cuopt::cython::linear_programming_ret_t, cuopt::cython::cpu_linear_programming_ret_t>
+    to_python_lp_ret() && override
+  {
+    return std::move(*this).to_linear_programming_ret_t();
+  }
+
+ private:
+  optimization_problem_solution_t<i_t, f_t> solution_;
+  // Cached host data (lazy initialization)
+  mutable std::optional<std::vector<f_t>> primal_solution_host_cache_;
+  mutable std::optional<std::vector<f_t>> dual_solution_host_cache_;
+  mutable std::optional<std::vector<f_t>> reduced_cost_host_cache_;
+};
+
+/**
+ * @brief GPU-backed MIP solution (wraps mip_solution_t)
+ *
+ * This class wraps the existing mip_solution_t which uses GPU memory.
+ * It implements the interface to allow polymorphism with CPU solutions.
+ */
+template <typename i_t, typename f_t>
+class gpu_mip_solution_t : public mip_solution_interface_t<i_t, f_t> {
+ public:
+  /**
+   * @brief Construct from existing mip_solution_t (move)
+   */
+  explicit gpu_mip_solution_t(mip_solution_t<i_t, f_t>&& solution) : solution_(std::move(solution))
+  {
+  }
+
+  /**
+   * @brief Get the underlying GPU solution
+   */
+  mip_solution_t<i_t, f_t>& get_solution() { return solution_; }
+  const mip_solution_t<i_t, f_t>& get_solution() const { return solution_; }
+
+  // Interface implementations
+  cuopt::logic_error get_error_status() const override { return solution_.get_error_status(); }
+
+  f_t get_solve_time() const override { return static_cast<f_t>(solution_.get_total_solve_time()); }
+
+  i_t get_solution_size() const override { return solution_.get_solution().size(); }
+
+  const std::vector<f_t>& get_solution_host() const override
+  {
+    if (!solution_host_cache_) {
+      // Copy from device to host using per-thread stream
+      auto stream          = rmm::cuda_stream_per_thread;
+      solution_host_cache_ = std::vector<f_t>(solution_.get_solution().size());
+      raft::copy(solution_host_cache_->data(),
+                 solution_.get_solution().data(),
+                 solution_.get_solution().size(),
+                 stream);
+      stream.synchronize();
+    }
+    return *solution_host_cache_;
+  }
+
+  f_t get_objective_value() const override { return solution_.get_objective_value(); }
+
+  f_t get_mip_gap() const override { return solution_.get_mip_gap(); }
+
+  f_t get_solution_bound() const override { return solution_.get_solution_bound(); }
+
+  mip_termination_status_t get_termination_status() const override
+  {
+    return solution_.get_termination_status();
+  }
+
+  f_t get_presolve_time() const override { return solution_.get_presolve_time(); }
+
+  f_t get_max_constraint_violation() const override
+  {
+    return solution_.get_max_constraint_violation();
+  }
+
+  f_t get_max_int_violation() const override { return solution_.get_max_int_violation(); }
+
+  f_t get_max_variable_bound_violation() const override
+  {
+    return solution_.get_max_variable_bound_violation();
+  }
+
+  i_t get_num_nodes() const override { return solution_.get_num_nodes(); }
+
+  i_t get_num_simplex_iterations() const override { return solution_.get_num_simplex_iterations(); }
+
+  mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view) override
+  {
+    // Already GPU, just move
+    return std::move(solution_);
+  }
+
+  /**
+   * @brief Convert GPU MIP solution to CPU MIP solution
+   * Copies data from device to host for test mode or CPU-only environments.
+   * @return A new cpu_mip_solution_t with all data copied to host vectors
+   */
+  std::unique_ptr<cpu_mip_solution_t<i_t, f_t>> to_cpu_solution() const
+  {
+    // Copy solution data from device to host
+    auto solution_host = get_solution_host();
+
+    // Create CPU MIP solution with all stats
+    return std::make_unique<cpu_mip_solution_t<i_t, f_t>>(
+      std::vector<f_t>(solution_host.begin(), solution_host.end()),
+      get_termination_status(),
+      get_objective_value(),
+      get_mip_gap(),
+      get_solution_bound(),
+      get_solve_time(),
+      get_presolve_time(),
+      get_max_constraint_violation(),
+      get_max_int_violation(),
+      get_max_variable_bound_violation(),
+      get_num_nodes(),
+      get_num_simplex_iterations());
+  }
+
+  /**
+   * @brief Convert to GPU-backed mip_ret_t struct for Python/Cython
+   * Moves device_uvector data into device_buffer wrappers with zero-copy.
+   */
+  cuopt::cython::mip_ret_t to_mip_ret_t() &&;
+
+  /**
+   * @brief Polymorphic conversion to Python return type (interface override)
+   * Returns GPU variant (mip_ret_t with device_buffer)
+   */
+  std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t> to_python_mip_ret() &&
+    override
+  {
+    return std::move(*this).to_mip_ret_t();
+  }
+
+ private:
+  mip_solution_t<i_t, f_t> solution_;
+  // Cached host data (lazy initialization)
+  mutable std::optional<std::vector<f_t>> solution_host_cache_;
+};
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
index d0731f3aa..eb100e61d 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
@@ -350,6 +350,69 @@ class optimization_problem_t {
    */
   void set_row_names(const std::vector<std::string>& row_names);
 
+  // ============================================================================
+  // Move-based setters (zero-copy, transfers ownership)
+  // ============================================================================
+
+  /**
+   * @brief Move constraint matrix data without copying (transfers ownership).
+   * @note This is a zero-copy operation that just moves device pointers.
+   * @param[in] A_values rvalue reference to constraint matrix values
+   * @param[in] A_indices rvalue reference to constraint matrix column indices
+   * @param[in] A_offsets rvalue reference to constraint matrix row offsets
+   */
+  void set_csr_constraint_matrix_move(rmm::device_uvector<f_t>&& A_values,
+                                      rmm::device_uvector<i_t>&& A_indices,
+                                      rmm::device_uvector<i_t>&& A_offsets);
+
+  /**
+   * @brief Move constraint bounds without copying (transfers ownership).
+   * @param[in] b rvalue reference to constraint bounds vector
+   */
+  void set_constraint_bounds_move(rmm::device_uvector<f_t>&& b);
+
+  /**
+   * @brief Move objective coefficients without copying (transfers ownership).
+   * @param[in] c rvalue reference to objective coefficients vector
+   */
+  void set_objective_coefficients_move(rmm::device_uvector<f_t>&& c);
+
+  /**
+   * @brief Move variable lower bounds without copying (transfers ownership).
+   * @param[in] variable_lower_bounds rvalue reference to lower bounds vector
+   */
+  void set_variable_lower_bounds_move(rmm::device_uvector<f_t>&& variable_lower_bounds);
+
+  /**
+   * @brief Move variable upper bounds without copying (transfers ownership).
+   * @param[in] variable_upper_bounds rvalue reference to upper bounds vector
+   */
+  void set_variable_upper_bounds_move(rmm::device_uvector<f_t>&& variable_upper_bounds);
+
+  /**
+   * @brief Move variable types without copying (transfers ownership).
+   * @param[in] variable_types rvalue reference to variable types vector
+   */
+  void set_variable_types_move(rmm::device_uvector<var_t>&& variable_types);
+
+  /**
+   * @brief Move constraint lower bounds without copying (transfers ownership).
+   * @param[in] constraint_lower_bounds rvalue reference to lower bounds vector
+   */
+  void set_constraint_lower_bounds_move(rmm::device_uvector<f_t>&& constraint_lower_bounds);
+
+  /**
+   * @brief Move constraint upper bounds without copying (transfers ownership).
+   * @param[in] constraint_upper_bounds rvalue reference to upper bounds vector
+   */
+  void set_constraint_upper_bounds_move(rmm::device_uvector<f_t>&& constraint_upper_bounds);
+
+  /**
+   * @brief Move row types without copying (transfers ownership).
+   * @param[in] row_types rvalue reference to row types vector
+   */
+  void set_row_types_move(rmm::device_uvector<char>&& row_types);
+
   /**
    * @brief Write the problem to an MPS formatted file
    *
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
new file mode 100644
index 000000000..04dcf2fe7
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_interface.hpp
@@ -0,0 +1,869 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/optimization_problem.hpp>
+
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// Forward declarations
+template <typename i_t, typename f_t>
+class pdlp_solver_settings_t;
+template <typename i_t, typename f_t>
+class mip_solver_settings_t;
+template <typename i_t, typename f_t>
+class lp_solution_interface_t;
+template <typename i_t, typename f_t>
+class mip_solution_interface_t;
+
+/**
+ * @brief Interface for optimization problem implementations that can store data
+ *        in either CPU or GPU memory.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Floating point type for values
+ *
+ * This interface provides setters that accept both CPU and GPU pointers,
+ * and getters in two forms:
+ * - Device getters returning rmm::device_uvector (GPU memory)
+ * - Host getters returning std::vector (CPU memory)
+ */
+template <typename i_t, typename f_t>
+class optimization_problem_interface_t {
+ public:
+  static_assert(std::is_integral<i_t>::value,
+                "'optimization_problem_interface_t' accepts only integer types for indexes");
+  static_assert(std::is_floating_point<f_t>::value,
+                "'optimization_problem_interface_t' accepts only floating point types for weights");
+
+  virtual ~optimization_problem_interface_t() = default;
+
+  // ============================================================================
+  // Setters (accept both CPU and GPU pointers)
+  // ============================================================================
+
+  /**
+   * @brief Set the sense of optimization to maximize.
+   * @param[in] maximize true means to maximize the objective function, else minimize.
+   */
+  virtual void set_maximize(bool maximize) = 0;
+
+  /**
+   * @brief Set the constraint matrix (A) in CSR format.
+   * @param[in] A_values Values of the CSR representation (device or host pointer)
+   * @param size_values Size of the A_values array
+   * @param[in] A_indices Indices of the CSR representation (device or host pointer)
+   * @param size_indices Size of the A_indices array
+   * @param[in] A_offsets Offsets of the CSR representation (device or host pointer)
+   * @param size_offsets Size of the A_offsets array
+   */
+  virtual void set_csr_constraint_matrix(const f_t* A_values,
+                                         i_t size_values,
+                                         const i_t* A_indices,
+                                         i_t size_indices,
+                                         const i_t* A_offsets,
+                                         i_t size_offsets) = 0;
+
+  /**
+   * @brief Set the constraint bounds (b / right-hand side) array.
+   * @param[in] b Device or host memory pointer
+   * @param size Size of the b array
+   */
+  virtual void set_constraint_bounds(const f_t* b, i_t size) = 0;
+
+  /**
+   * @brief Set the objective coefficients (c) array.
+   * @param[in] c Device or host memory pointer
+   * @param size Size of the c array
+   */
+  virtual void set_objective_coefficients(const f_t* c, i_t size) = 0;
+
+  /**
+   * @brief Set the scaling factor of the objective function.
+   * @param objective_scaling_factor Objective scaling factor value
+   */
+  virtual void set_objective_scaling_factor(f_t objective_scaling_factor) = 0;
+
+  /**
+   * @brief Set the offset of the objective function.
+   * @param objective_offset Objective offset value
+   */
+  virtual void set_objective_offset(f_t objective_offset) = 0;
+
+  /**
+   * @brief Set the quadratic objective matrix (Q) in CSR format.
+   * @param[in] Q_values Values of the CSR representation
+   * @param size_values Size of the Q_values array
+   * @param[in] Q_indices Indices of the CSR representation
+   * @param size_indices Size of the Q_indices array
+   * @param[in] Q_offsets Offsets of the CSR representation
+   * @param size_offsets Size of the Q_offsets array
+   * @param validate_positive_semi_definite Whether to validate if the matrix is positive semi
+   * definite
+   */
+  virtual void set_quadratic_objective_matrix(const f_t* Q_values,
+                                              i_t size_values,
+                                              const i_t* Q_indices,
+                                              i_t size_indices,
+                                              const i_t* Q_offsets,
+                                              i_t size_offsets,
+                                              bool validate_positive_semi_definite = false) = 0;
+
+  /**
+   * @brief Set the variables (x) lower bounds.
+   * @param[in] variable_lower_bounds Device or host memory pointer
+   * @param size Size of the variable_lower_bounds array
+   */
+  virtual void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) = 0;
+
+  /**
+   * @brief Set the variables (x) upper bounds.
+   * @param[in] variable_upper_bounds Device or host memory pointer
+   * @param size Size of the variable_upper_bounds array
+   */
+  virtual void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) = 0;
+
+  /**
+   * @brief Set the variables types.
+   * @param[in] variable_types Device or host memory pointer to a var_t array
+   * @param size Size of the variable_types array
+   */
+  virtual void set_variable_types(const var_t* variable_types, i_t size) = 0;
+
+  /**
+   * @brief Set the problem category.
+   * @param[in] category Problem category value
+   */
+  virtual void set_problem_category(const problem_category_t& category) = 0;
+
+  /**
+   * @brief Set the constraints lower bounds.
+   * @param[in] constraint_lower_bounds Device or host memory pointer
+   * @param size Size of the constraint_lower_bounds array
+   */
+  virtual void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) = 0;
+
+  /**
+   * @brief Set the constraints upper bounds.
+   * @param[in] constraint_upper_bounds Device or host memory pointer
+   * @param size Size of the constraint_upper_bounds array
+   */
+  virtual void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) = 0;
+
+  /**
+   * @brief Set the type of each row (constraint).
+   * @param[in] row_types Device or host memory pointer to a character array
+   * @param size Size of the row_types array
+   */
+  virtual void set_row_types(const char* row_types, i_t size) = 0;
+
+  /**
+   * @brief Set the name of the objective function.
+   * @param[in] objective_name Objective name value
+   */
+  virtual void set_objective_name(const std::string& objective_name) = 0;
+
+  /**
+   * @brief Set the problem name.
+   * @param[in] problem_name Problem name value
+   */
+  virtual void set_problem_name(const std::string& problem_name) = 0;
+
+  /**
+   * @brief Set the variables names.
+   * @param[in] variable_names Variable names values
+   */
+  virtual void set_variable_names(const std::vector<std::string>& variable_names) = 0;
+
+  /**
+   * @brief Set the row names.
+   * @param[in] row_names Row names value
+   */
+  virtual void set_row_names(const std::vector<std::string>& row_names) = 0;
+
+  // ============================================================================
+  // Getters - Device memory (GPU)
+  // ============================================================================
+
+  virtual i_t get_n_variables() const                                           = 0;
+  virtual i_t get_n_constraints() const                                         = 0;
+  virtual i_t get_nnz() const                                                   = 0;
+  virtual i_t get_n_integers() const                                            = 0;
+  virtual const rmm::device_uvector<f_t>& get_constraint_matrix_values() const  = 0;
+  virtual rmm::device_uvector<f_t>& get_constraint_matrix_values()              = 0;
+  virtual const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const = 0;
+  virtual rmm::device_uvector<i_t>& get_constraint_matrix_indices()             = 0;
+  virtual const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const = 0;
+  virtual rmm::device_uvector<i_t>& get_constraint_matrix_offsets()             = 0;
+  virtual const rmm::device_uvector<f_t>& get_constraint_bounds() const         = 0;
+  virtual rmm::device_uvector<f_t>& get_constraint_bounds()                     = 0;
+  virtual const rmm::device_uvector<f_t>& get_objective_coefficients() const    = 0;
+  virtual rmm::device_uvector<f_t>& get_objective_coefficients()                = 0;
+  virtual f_t get_objective_scaling_factor() const                              = 0;
+  virtual f_t get_objective_offset() const                                      = 0;
+  virtual const rmm::device_uvector<f_t>& get_variable_lower_bounds() const     = 0;
+  virtual rmm::device_uvector<f_t>& get_variable_lower_bounds()                 = 0;
+  virtual const rmm::device_uvector<f_t>& get_variable_upper_bounds() const     = 0;
+  virtual rmm::device_uvector<f_t>& get_variable_upper_bounds()                 = 0;
+  virtual const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const   = 0;
+  virtual rmm::device_uvector<f_t>& get_constraint_lower_bounds()               = 0;
+  virtual const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const   = 0;
+  virtual rmm::device_uvector<f_t>& get_constraint_upper_bounds()               = 0;
+  virtual const rmm::device_uvector<char>& get_row_types() const                = 0;
+  virtual const rmm::device_uvector<var_t>& get_variable_types() const          = 0;
+  virtual bool get_sense() const                                                = 0;
+  virtual bool empty() const                                                    = 0;
+  virtual std::string get_objective_name() const                                = 0;
+  virtual std::string get_problem_name() const                                  = 0;
+  virtual problem_category_t get_problem_category() const                       = 0;
+  virtual const std::vector<std::string>& get_variable_names() const            = 0;
+  virtual const std::vector<std::string>& get_row_names() const                 = 0;
+  virtual const std::vector<i_t>& get_quadratic_objective_offsets() const       = 0;
+  virtual const std::vector<i_t>& get_quadratic_objective_indices() const       = 0;
+  virtual const std::vector<f_t>& get_quadratic_objective_values() const        = 0;
+  virtual bool has_quadratic_objective() const                                  = 0;
+
+  // ============================================================================
+  // Conversion
+  // ============================================================================
+
+  /**
+   * @brief Convert to optimization_problem_t (moves data to GPU if needed)
+   * @return optimization_problem_t<i_t, f_t> GPU-backed optimization problem
+   */
+  virtual optimization_problem_t<i_t, f_t> to_optimization_problem() = 0;
+
+  // ============================================================================
+  // Getters - Host memory (CPU) - NEW
+  // ============================================================================
+
+  /**
+   * @brief Get constraint matrix values in host memory.
+   * @return std::vector containing the constraint matrix values
+   */
+  virtual std::vector<f_t> get_constraint_matrix_values_host() const = 0;
+
+  /**
+   * @brief Get constraint matrix indices in host memory.
+   * @return std::vector containing the constraint matrix indices
+   */
+  virtual std::vector<i_t> get_constraint_matrix_indices_host() const = 0;
+
+  /**
+   * @brief Get constraint matrix offsets in host memory.
+   * @return std::vector containing the constraint matrix offsets
+   */
+  virtual std::vector<i_t> get_constraint_matrix_offsets_host() const = 0;
+
+  /**
+   * @brief Get constraint bounds in host memory.
+   * @return std::vector containing the constraint bounds
+   */
+  virtual std::vector<f_t> get_constraint_bounds_host() const = 0;
+
+  /**
+   * @brief Get objective coefficients in host memory.
+   * @return std::vector containing the objective coefficients
+   */
+  virtual std::vector<f_t> get_objective_coefficients_host() const = 0;
+
+  /**
+   * @brief Get variable lower bounds in host memory.
+   * @return std::vector containing the variable lower bounds
+   */
+  virtual std::vector<f_t> get_variable_lower_bounds_host() const = 0;
+
+  /**
+   * @brief Get variable upper bounds in host memory.
+   * @return std::vector containing the variable upper bounds
+   */
+  virtual std::vector<f_t> get_variable_upper_bounds_host() const = 0;
+
+  /**
+   * @brief Get constraint lower bounds in host memory.
+   * @return std::vector containing the constraint lower bounds
+   */
+  virtual std::vector<f_t> get_constraint_lower_bounds_host() const = 0;
+
+  /**
+   * @brief Get constraint upper bounds in host memory.
+   * @return std::vector containing the constraint upper bounds
+   */
+  virtual std::vector<f_t> get_constraint_upper_bounds_host() const = 0;
+
+  /**
+   * @brief Get row types in host memory.
+   * @return std::vector containing the row types
+   */
+  virtual std::vector<char> get_row_types_host() const = 0;
+
+  /**
+   * @brief Get variable types in host memory.
+   * @return std::vector containing the variable types
+   */
+  virtual std::vector<var_t> get_variable_types_host() const = 0;
+
+  // ============================================================================
+  // File I/O
+  // ============================================================================
+
+  /**
+   * @brief Write the optimization problem to an MPS file.
+   * @param[in] mps_file_path Path to the output MPS file
+   */
+  virtual void write_to_mps(const std::string& mps_file_path) = 0;
+
+  // ============================================================================
+  // Comparison
+  // ============================================================================
+
+  /**
+   * @brief Check if this problem is equivalent to another problem.
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
+   */
+  virtual bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const = 0;
+
+  // ============================================================================
+  // Remote Execution (Polymorphic Dispatch)
+  // ============================================================================
+
+  /**
+   * @brief Solve LP problem using remote execution (polymorphic)
+   * This method dispatches to the appropriate solve_lp_remote overload based on
+   * the concrete type (GPU or CPU).
+   * @param[in] settings PDLP solver settings
+   * @return Pointer to solution interface
+   */
+  virtual std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+    pdlp_solver_settings_t<i_t, f_t> const& settings) const = 0;
+
+  /**
+   * @brief Solve MIP problem using remote execution (polymorphic)
+   * This method dispatches to the appropriate solve_mip_remote overload based on
+   * the concrete type (GPU or CPU).
+   * @param[in] settings MIP solver settings
+   * @return Pointer to solution interface
+   */
+  virtual std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+    mip_solver_settings_t<i_t, f_t> const& settings) const = 0;
+
+  // ============================================================================
+  // C API Support: Copy to Host (Polymorphic)
+  // ============================================================================
+
+  /**
+   * @brief Copy objective coefficients to host memory (polymorphic)
+   * GPU implementation: cudaMemcpy from device to host
+   * CPU implementation: std::copy from host vector
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of elements to copy
+   */
+  virtual void copy_objective_coefficients_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy constraint matrix to host memory (polymorphic)
+   * @param[out] values Output buffer for matrix values
+   * @param[out] indices Output buffer for column indices
+   * @param[out] offsets Output buffer for row offsets
+   * @param[in] num_values Number of non-zero values
+   * @param[in] num_indices Number of indices (should equal num_values)
+   * @param[in] num_offsets Number of row offsets (num_constraints + 1)
+   */
+  virtual void copy_constraint_matrix_to_host(f_t* values,
+                                              i_t* indices,
+                                              i_t* offsets,
+                                              i_t num_values,
+                                              i_t num_indices,
+                                              i_t num_offsets) const = 0;
+
+  /**
+   * @brief Copy constraint sense/row types to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of constraints
+   */
+  virtual void copy_row_types_to_host(char* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy constraint bounds (RHS) to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of constraints
+   */
+  virtual void copy_constraint_bounds_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy constraint lower bounds to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of constraints
+   */
+  virtual void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy constraint upper bounds to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of constraints
+   */
+  virtual void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy variable lower bounds to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of variables
+   */
+  virtual void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy variable upper bounds to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of variables
+   */
+  virtual void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const = 0;
+
+  /**
+   * @brief Copy variable types to host memory (polymorphic)
+   * @param[out] output Pointer to host memory buffer
+   * @param[in] size Number of variables
+   */
+  virtual void copy_variable_types_to_host(var_t* output, i_t size) const = 0;
+};
+
+// ==============================================================================
+// GPU Implementation
+// ==============================================================================
+
+/**
+ * @brief GPU-based implementation of optimization_problem_interface_t.
+ *
+ * This implementation stores all data in GPU memory using rmm::device_uvector.
+ * It implements both device getters (returning rmm::device_uvector references)
+ * and host getters (returning std::vector by copying from GPU to CPU).
+ */
+template <typename i_t, typename f_t>
+class gpu_optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
+ public:
+  explicit gpu_optimization_problem_t(raft::handle_t const* handle_ptr);
+
+  // Setters
+  void set_maximize(bool maximize) override;
+  void set_csr_constraint_matrix(const f_t* A_values,
+                                 i_t size_values,
+                                 const i_t* A_indices,
+                                 i_t size_indices,
+                                 const i_t* A_offsets,
+                                 i_t size_offsets) override;
+  void set_constraint_bounds(const f_t* b, i_t size) override;
+  void set_objective_coefficients(const f_t* c, i_t size) override;
+  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
+  void set_objective_offset(f_t objective_offset) override;
+  void set_quadratic_objective_matrix(const f_t* Q_values,
+                                      i_t size_values,
+                                      const i_t* Q_indices,
+                                      i_t size_indices,
+                                      const i_t* Q_offsets,
+                                      i_t size_offsets,
+                                      bool validate_positive_semi_definite = false) override;
+  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
+  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
+  void set_variable_types(const var_t* variable_types, i_t size) override;
+  void set_problem_category(const problem_category_t& category) override;
+  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
+  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
+  void set_row_types(const char* row_types, i_t size) override;
+  void set_objective_name(const std::string& objective_name) override;
+  void set_problem_name(const std::string& problem_name) override;
+  void set_variable_names(const std::vector<std::string>& variable_names) override;
+  void set_row_names(const std::vector<std::string>& row_names) override;
+
+  // Device getters
+  i_t get_n_variables() const override;
+  i_t get_n_constraints() const override;
+  i_t get_nnz() const override;
+  i_t get_n_integers() const override;
+  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
+  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
+  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_bounds() override;
+  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
+  rmm::device_uvector<f_t>& get_objective_coefficients() override;
+  f_t get_objective_scaling_factor() const override;
+  f_t get_objective_offset() const override;
+  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
+  const rmm::device_uvector<char>& get_row_types() const override;
+  const rmm::device_uvector<var_t>& get_variable_types() const override;
+  bool get_sense() const override;
+  bool empty() const override;
+  std::string get_objective_name() const override;
+  std::string get_problem_name() const override;
+  problem_category_t get_problem_category() const override;
+  const std::vector<std::string>& get_variable_names() const override;
+  const std::vector<std::string>& get_row_names() const override;
+  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
+  const std::vector<i_t>& get_quadratic_objective_indices() const override;
+  const std::vector<f_t>& get_quadratic_objective_values() const override;
+  bool has_quadratic_objective() const override;
+
+  // Host getters
+  std::vector<f_t> get_constraint_matrix_values_host() const override;
+  std::vector<i_t> get_constraint_matrix_indices_host() const override;
+  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
+  std::vector<f_t> get_constraint_bounds_host() const override;
+  std::vector<f_t> get_objective_coefficients_host() const override;
+  std::vector<f_t> get_variable_lower_bounds_host() const override;
+  std::vector<f_t> get_variable_upper_bounds_host() const override;
+  std::vector<f_t> get_constraint_lower_bounds_host() const override;
+  std::vector<f_t> get_constraint_upper_bounds_host() const override;
+  std::vector<char> get_row_types_host() const override;
+  std::vector<var_t> get_variable_types_host() const override;
+
+  /**
+   * @brief Convert this GPU optimization problem to an optimization_problem_t
+   *        by moving GPU data ownership (zero-copy transfer).
+   * @return optimization_problem_t with ownership of all GPU data
+   */
+  optimization_problem_t<i_t, f_t> to_optimization_problem();
+
+  /**
+   * @brief Write the optimization problem to an MPS file.
+   * @param[in] mps_file_path Path to the output MPS file
+   */
+  void write_to_mps(const std::string& mps_file_path) override;
+
+  /**
+   * @brief Check if this problem is equivalent to another problem.
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
+   */
+  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
+
+  // Remote execution (polymorphic dispatch)
+  std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+    pdlp_solver_settings_t<i_t, f_t> const& settings) const override;
+
+  std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+    mip_solver_settings_t<i_t, f_t> const& settings) const override;
+
+  // C API support: Copy to host (polymorphic)
+  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_matrix_to_host(f_t* values,
+                                      i_t* indices,
+                                      i_t* offsets,
+                                      i_t num_values,
+                                      i_t num_indices,
+                                      i_t num_offsets) const override;
+  void copy_row_types_to_host(char* output, i_t size) const override;
+  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_types_to_host(var_t* output, i_t size) const override;
+
+  raft::handle_t const* get_handle_ptr() const noexcept;
+
+ private:
+  raft::handle_t const* handle_ptr_{nullptr};
+  rmm::cuda_stream_view stream_view_;
+
+  problem_category_t problem_category_ = problem_category_t::LP;
+  bool maximize_{false};
+  i_t n_vars_{0};
+  i_t n_constraints_{0};
+
+  // GPU memory storage
+  rmm::device_uvector<f_t> A_;
+  rmm::device_uvector<i_t> A_indices_;
+  rmm::device_uvector<i_t> A_offsets_;
+  rmm::device_uvector<f_t> b_;
+  rmm::device_uvector<f_t> c_;
+  f_t objective_scaling_factor_{1};
+  f_t objective_offset_{0};
+
+  std::vector<i_t> Q_offsets_;
+  std::vector<i_t> Q_indices_;
+  std::vector<f_t> Q_values_;
+
+  rmm::device_uvector<f_t> variable_lower_bounds_;
+  rmm::device_uvector<f_t> variable_upper_bounds_;
+  rmm::device_uvector<f_t> constraint_lower_bounds_;
+  rmm::device_uvector<f_t> constraint_upper_bounds_;
+  rmm::device_uvector<char> row_types_;
+  rmm::device_uvector<var_t> variable_types_;
+
+  std::string objective_name_;
+  std::string problem_name_;
+  std::vector<std::string> var_names_{};
+  std::vector<std::string> row_names_{};
+};
+
+// ==============================================================================
+// CPU Implementation
+// ==============================================================================
+
+/**
+ * @brief CPU-based implementation of optimization_problem_interface_t.
+ *
+ * This implementation stores all data in CPU memory using std::vector.
+ * It only implements host getters (returning std::vector references).
+ * Device getters throw exceptions as GPU memory access is not supported.
+ */
+template <typename i_t, typename f_t>
+class cpu_optimization_problem_t : public optimization_problem_interface_t<i_t, f_t> {
+ public:
+  explicit cpu_optimization_problem_t(raft::handle_t const* handle_ptr = nullptr);
+
+  // Setters
+  void set_maximize(bool maximize) override;
+  void set_csr_constraint_matrix(const f_t* A_values,
+                                 i_t size_values,
+                                 const i_t* A_indices,
+                                 i_t size_indices,
+                                 const i_t* A_offsets,
+                                 i_t size_offsets) override;
+  void set_constraint_bounds(const f_t* b, i_t size) override;
+  void set_objective_coefficients(const f_t* c, i_t size) override;
+  void set_objective_scaling_factor(f_t objective_scaling_factor) override;
+  void set_objective_offset(f_t objective_offset) override;
+  void set_quadratic_objective_matrix(const f_t* Q_values,
+                                      i_t size_values,
+                                      const i_t* Q_indices,
+                                      i_t size_indices,
+                                      const i_t* Q_offsets,
+                                      i_t size_offsets,
+                                      bool validate_positive_semi_definite = false) override;
+  void set_variable_lower_bounds(const f_t* variable_lower_bounds, i_t size) override;
+  void set_variable_upper_bounds(const f_t* variable_upper_bounds, i_t size) override;
+  void set_variable_types(const var_t* variable_types, i_t size) override;
+  void set_problem_category(const problem_category_t& category) override;
+  void set_constraint_lower_bounds(const f_t* constraint_lower_bounds, i_t size) override;
+  void set_constraint_upper_bounds(const f_t* constraint_upper_bounds, i_t size) override;
+  void set_row_types(const char* row_types, i_t size) override;
+  void set_objective_name(const std::string& objective_name) override;
+  void set_problem_name(const std::string& problem_name) override;
+  void set_variable_names(const std::vector<std::string>& variable_names) override;
+  void set_row_names(const std::vector<std::string>& row_names) override;
+
+  // Device getters - throw exceptions (not supported for CPU implementation)
+  i_t get_n_variables() const override;
+  i_t get_n_constraints() const override;
+  i_t get_nnz() const override;
+  i_t get_n_integers() const override;
+  const rmm::device_uvector<f_t>& get_constraint_matrix_values() const override;
+  rmm::device_uvector<f_t>& get_constraint_matrix_values() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_indices() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_indices() override;
+  const rmm::device_uvector<i_t>& get_constraint_matrix_offsets() const override;
+  rmm::device_uvector<i_t>& get_constraint_matrix_offsets() override;
+  const rmm::device_uvector<f_t>& get_constraint_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_bounds() override;
+  const rmm::device_uvector<f_t>& get_objective_coefficients() const override;
+  rmm::device_uvector<f_t>& get_objective_coefficients() override;
+  f_t get_objective_scaling_factor() const override;
+  f_t get_objective_offset() const override;
+  const rmm::device_uvector<f_t>& get_variable_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_variable_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_variable_upper_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_lower_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_lower_bounds() override;
+  const rmm::device_uvector<f_t>& get_constraint_upper_bounds() const override;
+  rmm::device_uvector<f_t>& get_constraint_upper_bounds() override;
+  const rmm::device_uvector<char>& get_row_types() const override;
+  const rmm::device_uvector<var_t>& get_variable_types() const override;
+  bool get_sense() const override;
+  bool empty() const override;
+  std::string get_objective_name() const override;
+  std::string get_problem_name() const override;
+  problem_category_t get_problem_category() const override;
+  const std::vector<std::string>& get_variable_names() const override;
+  const std::vector<std::string>& get_row_names() const override;
+  const std::vector<i_t>& get_quadratic_objective_offsets() const override;
+  const std::vector<i_t>& get_quadratic_objective_indices() const override;
+  const std::vector<f_t>& get_quadratic_objective_values() const override;
+  bool has_quadratic_objective() const override;
+
+  // Host getters - these are the only supported getters for CPU implementation
+  std::vector<f_t> get_constraint_matrix_values_host() const override;
+  std::vector<i_t> get_constraint_matrix_indices_host() const override;
+  std::vector<i_t> get_constraint_matrix_offsets_host() const override;
+  std::vector<f_t> get_constraint_bounds_host() const override;
+  std::vector<f_t> get_objective_coefficients_host() const override;
+  std::vector<f_t> get_variable_lower_bounds_host() const override;
+  std::vector<f_t> get_variable_upper_bounds_host() const override;
+  std::vector<f_t> get_constraint_lower_bounds_host() const override;
+  std::vector<f_t> get_constraint_upper_bounds_host() const override;
+  std::vector<char> get_row_types_host() const override;
+  std::vector<var_t> get_variable_types_host() const override;
+
+  /**
+   * @brief Set the CUDA handle for GPU operations
+   *
+   * This is used in test mode when a CPU problem needs to be converted to GPU
+   * for local solving. The handle must be set before calling to_optimization_problem().
+   *
+   * @param handle_ptr Pointer to the RAFT handle with CUDA resources
+   */
+  void set_handle(raft::handle_t const* handle_ptr) { handle_ptr_ = handle_ptr; }
+
+  /**
+   * @brief Convert this CPU optimization problem to an optimization_problem_t
+   *        by copying CPU data to GPU (requires GPU memory transfer).
+   * @return optimization_problem_t with all data copied to GPU
+   */
+  optimization_problem_t<i_t, f_t> to_optimization_problem();
+
+  /**
+   * @brief Write the optimization problem to an MPS file.
+   * @param[in] mps_file_path Path to the output MPS file
+   */
+  void write_to_mps(const std::string& mps_file_path) override;
+
+  /**
+   * @brief Check if this problem is equivalent to another problem.
+   * @param[in] other The other optimization problem to compare against
+   * @return true if the problems are equivalent (up to permutation of variables/constraints)
+   */
+  bool is_equivalent(const optimization_problem_interface_t<i_t, f_t>& other) const override;
+
+  // Remote execution (polymorphic dispatch)
+  std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+    pdlp_solver_settings_t<i_t, f_t> const& settings) const override;
+
+  std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+    mip_solver_settings_t<i_t, f_t> const& settings) const override;
+
+  // C API support: Copy to host (polymorphic)
+  void copy_objective_coefficients_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_matrix_to_host(f_t* values,
+                                      i_t* indices,
+                                      i_t* offsets,
+                                      i_t num_values,
+                                      i_t num_indices,
+                                      i_t num_offsets) const override;
+  void copy_row_types_to_host(char* output, i_t size) const override;
+  void copy_constraint_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_constraint_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_lower_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_upper_bounds_to_host(f_t* output, i_t size) const override;
+  void copy_variable_types_to_host(var_t* output, i_t size) const override;
+
+ private:
+  raft::handle_t const* handle_ptr_{nullptr};
+
+  problem_category_t problem_category_ = problem_category_t::LP;
+  bool maximize_{false};
+  i_t n_vars_{0};
+  i_t n_constraints_{0};
+
+  // CPU memory storage
+  std::vector<f_t> A_;
+  std::vector<i_t> A_indices_;
+  std::vector<i_t> A_offsets_;
+  std::vector<f_t> b_;
+  std::vector<f_t> c_;
+  f_t objective_scaling_factor_{1};
+  f_t objective_offset_{0};
+
+  std::vector<i_t> Q_offsets_;
+  std::vector<i_t> Q_indices_;
+  std::vector<f_t> Q_values_;
+
+  std::vector<f_t> variable_lower_bounds_;
+  std::vector<f_t> variable_upper_bounds_;
+  std::vector<f_t> constraint_lower_bounds_;
+  std::vector<f_t> constraint_upper_bounds_;
+  std::vector<char> row_types_;
+  std::vector<var_t> variable_types_;
+
+  std::string objective_name_;
+  std::string problem_name_;
+  std::vector<std::string> var_names_{};
+  std::vector<std::string> row_names_{};
+};
+
+// ============================================================================
+// Backend Selection Utilities
+// ============================================================================
+
+/**
+ * @brief Enum for execution mode (local vs remote solve)
+ */
+enum class execution_mode_t {
+  LOCAL,  ///< Solve locally on this machine
+  REMOTE  ///< Solve remotely via gRPC
+};
+
+/**
+ * @brief Enum for memory backend type (GPU vs CPU memory)
+ */
+enum class memory_backend_t {
+  GPU,  ///< Use GPU memory (device memory via RMM)
+  CPU   ///< Use CPU memory (host memory)
+};
+
+/**
+ * @brief Check if remote execution is enabled via environment variables
+ * @return true if both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set
+ */
+bool is_remote_execution_enabled();
+
+/**
+ * @brief Determine execution mode based on environment variables
+ *
+ * @return execution_mode_t::REMOTE if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set,
+ *         execution_mode_t::LOCAL otherwise
+ */
+execution_mode_t get_execution_mode();
+
+/**
+ * @brief Check if GPU memory should be used for remote execution
+ * @return true if CUOPT_USE_GPU_MEM_FOR_REMOTE is set to "true" or "1" (case-insensitive)
+ */
+bool use_gpu_memory_for_remote();
+
+/**
+ * @brief Check if CPU memory should be used for local execution (test mode)
+ *
+ * This is intended for testing CPU problem/solution structures without remote execution.
+ * When enabled, local solve will convert CPU problems to GPU, solve, and convert back.
+ *
+ * @return true if CUOPT_USE_CPU_MEM_FOR_LOCAL is set to "true" or "1" (case-insensitive)
+ */
+bool use_cpu_memory_for_local();
+
+/**
+ * @brief Determine which memory backend to use based on execution mode
+ *
+ * Logic:
+ *   - LOCAL execution -> GPU memory by default, CPU if CUOPT_USE_CPU_MEM_FOR_LOCAL=true (test mode)
+ *   - REMOTE execution -> CPU memory by default, GPU if CUOPT_USE_GPU_MEM_FOR_REMOTE=true
+ *
+ * @return memory_backend_t::GPU or memory_backend_t::CPU
+ */
+memory_backend_t get_memory_backend_type();
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp
new file mode 100644
index 000000000..88b082497
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp
@@ -0,0 +1,514 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/constants.h>
+#include <cuopt/error.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>  // For mip_termination_status_t
+#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>  // For pdlp_termination_status_t
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cuopt/linear_programming/utilities/cython_types.hpp>
+
+#include <stdexcept>
+#include <string>
+#include <variant>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// Forward declarations
+template <typename i_t, typename f_t>
+class optimization_problem_solution_t;
+template <typename i_t, typename f_t>
+class mip_solution_t;
+
+/**
+ * @brief Abstract interface for optimization problem solutions (LP and MIP)
+ *
+ * This interface allows for CPU or GPU-backed solution storage.
+ * - gpu_optimization_problem_solution_t: Uses rmm::device_uvector (GPU memory)
+ * - cpu_optimization_problem_solution_t: Uses std::vector (CPU/host memory)
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Floating point type for values
+ */
+template <typename i_t, typename f_t>
+class optimization_problem_solution_interface_t {
+ public:
+  virtual ~optimization_problem_solution_interface_t() = default;
+
+  /**
+   * @brief Check if this is a MIP solution or LP solution
+   * @return true if MIP, false if LP
+   */
+  virtual bool is_mip() const = 0;
+
+  /**
+   * @brief Get the error status
+   * @return The error status
+   */
+  virtual cuopt::logic_error get_error_status() const = 0;
+
+  /**
+   * @brief Get the solve time in seconds
+   * @return Time in seconds
+   */
+  virtual double get_solve_time() const = 0;
+
+  /**
+   * @brief Get solution (variable values) as host vector
+   * @return Host vector of variable values
+   * @note For LP solutions, this returns the primal solution.
+   *       For MIP solutions, this returns the integer solution.
+   *       Provides uniform access to variable values regardless of problem type.
+   */
+  virtual const std::vector<f_t>& get_solution_host() const = 0;
+
+  /**
+   * @brief Get termination status as integer
+   * @return Termination status cast to int
+   * @note Provides uniform access to termination status regardless of problem type.
+   *       LP returns pdlp_termination_status_t cast to int.
+   *       MIP returns mip_termination_status_t cast to int.
+   *       Both use the same CUOPT_TERMINATION_STATUS_* integer constants.
+   */
+  virtual int get_termination_status_int() const = 0;
+
+  // ============================================================================
+  // Cross-type methods: These allow polymorphic access from C API.
+  // Each method throws std::logic_error if called on the wrong solution type.
+  // ============================================================================
+
+  /**
+   * @brief Get objective value (unified interface)
+   * @return Objective value
+   * @note LP: Returns primal objective (id=0). MIP: Returns best objective.
+   * @throws std::logic_error Never (implemented by both types)
+   */
+  virtual f_t get_objective_value() const = 0;
+
+  /**
+   * @brief Get MIP gap (MIP-only)
+   * @return MIP gap
+   * @throws std::logic_error if called on LP solution
+   */
+  virtual f_t get_mip_gap() const = 0;
+
+  /**
+   * @brief Get solution bound (MIP-only)
+   * @return Solution bound
+   * @throws std::logic_error if called on LP solution
+   */
+  virtual f_t get_solution_bound() const = 0;
+
+  /**
+   * @brief Get dual solution as host vector (LP-only)
+   * @return Host vector of dual solution
+   * @throws std::logic_error if called on MIP solution
+   */
+  virtual const std::vector<f_t>& get_dual_solution() const = 0;
+
+  /**
+   * @brief Get dual objective value (LP-only)
+   * @return Dual objective value
+   * @throws std::logic_error if called on MIP solution
+   */
+  virtual f_t get_dual_objective_value() const = 0;
+
+  /**
+   * @brief Get reduced costs as host vector (LP-only)
+   * @return Host vector of reduced costs
+   * @throws std::logic_error if called on MIP solution
+   */
+  virtual const std::vector<f_t>& get_reduced_costs() const = 0;
+};
+
+/**
+ * @brief Interface for LP/PDLP solutions
+ */
+template <typename i_t, typename f_t>
+class lp_solution_interface_t : public optimization_problem_solution_interface_t<i_t, f_t> {
+ public:
+  bool is_mip() const override { return false; }
+
+  /**
+   * @brief Get the primal solution size
+   * @return Number of variables
+   */
+  virtual i_t get_primal_solution_size() const = 0;
+
+  /**
+   * @brief Get the dual solution size
+   * @return Number of constraints
+   */
+  virtual i_t get_dual_solution_size() const = 0;
+
+  /**
+   * @brief Get the reduced cost size
+   * @return Number of variables
+   */
+  virtual i_t get_reduced_cost_size() const = 0;
+
+  /**
+   * @brief Get primal solution as host vector
+   * @return Host vector of primal solution
+   */
+  virtual const std::vector<f_t>& get_primal_solution_host() const = 0;
+
+  /**
+   * @brief Get solution as host vector (wraps get_primal_solution_host)
+   * @return Host vector of variable values
+   * @note For LP, this returns the primal solution (variable values).
+   *       Provides interface parallelism with mip_solution_interface_t.
+   */
+  const std::vector<f_t>& get_solution_host() const override { return get_primal_solution_host(); }
+
+  // ============================================================================
+  // Base interface implementations for cross-type polymorphic access
+  // ============================================================================
+
+  /**
+   * @brief Get objective value (base interface implementation)
+   * Delegates to get_objective_value(0) for LP
+   */
+  f_t get_objective_value() const override { return get_objective_value(0); }
+
+  /**
+   * @brief MIP gap - not available for LP solutions
+   * @throws std::logic_error always
+   */
+  f_t get_mip_gap() const override
+  {
+    throw std::logic_error("get_mip_gap() is not available for LP solutions");
+  }
+
+  /**
+   * @brief Solution bound - not available for LP solutions
+   * @throws std::logic_error always
+   */
+  f_t get_solution_bound() const override
+  {
+    throw std::logic_error("get_solution_bound() is not available for LP solutions");
+  }
+
+  /**
+   * @brief Get dual solution (base interface implementation)
+   * Delegates to get_dual_solution_host()
+   */
+  const std::vector<f_t>& get_dual_solution() const override { return get_dual_solution_host(); }
+
+  /**
+   * @brief Get dual objective value (base interface implementation)
+   * Delegates to get_dual_objective_value(0)
+   */
+  f_t get_dual_objective_value() const override { return get_dual_objective_value(0); }
+
+  /**
+   * @brief Get reduced costs (base interface implementation)
+   * Delegates to get_reduced_cost_host()
+   */
+  const std::vector<f_t>& get_reduced_costs() const override { return get_reduced_cost_host(); }
+
+  // ============================================================================
+  // LP-specific methods
+  // ============================================================================
+
+  /**
+   * @brief Get dual solution as host vector
+   * @return Host vector of dual solution
+   */
+  virtual const std::vector<f_t>& get_dual_solution_host() const = 0;
+
+  /**
+   * @brief Get reduced cost as host vector
+   * @return Host vector of reduced costs
+   */
+  virtual const std::vector<f_t>& get_reduced_cost_host() const = 0;
+
+  /**
+   * @brief Get solve time
+   * @return Total solve time in seconds
+   */
+  virtual f_t get_solve_time() const = 0;
+
+  /**
+   * @brief Get primal objective value
+   * @return Primal objective value
+   */
+  virtual f_t get_objective_value(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get dual objective value
+   * @return Dual objective value
+   */
+  virtual f_t get_dual_objective_value(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get termination status
+   * @return Termination status
+   */
+  virtual pdlp_termination_status_t get_termination_status(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get termination status as integer (implements base interface)
+   * @return pdlp_termination_status_t cast to int
+   */
+  int get_termination_status_int() const override
+  {
+    return static_cast<int>(get_termination_status());
+  }
+
+  /**
+   * @brief Get L2 primal residual
+   * @return L2 primal residual
+   */
+  virtual f_t get_l2_primal_residual(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get L2 dual residual
+   * @return L2 dual residual
+   */
+  virtual f_t get_l2_dual_residual(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get gap
+   * @return Gap value
+   */
+  virtual f_t get_gap(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get number of iterations
+   * @return Number of iterations
+   */
+  virtual i_t get_num_iterations(i_t id = 0) const = 0;
+
+  /**
+   * @brief Check if solved by PDLP
+   * @return true if solved by PDLP
+   */
+  virtual bool is_solved_by_pdlp(i_t id = 0) const = 0;
+
+  /**
+   * @brief Get PDLP warm start data (GPU solutions only)
+   * @return Reference to warm start data
+   * @note GPU solutions only - throws for CPU solutions
+   */
+  virtual const pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data() const = 0;
+
+  /**
+   * @brief Check if warm start data is available
+   * @return true if warm start data is available, false otherwise
+   */
+  virtual bool has_warm_start_data() const = 0;
+
+  // Individual warm start data accessors (work for both GPU and CPU)
+  // Return empty vectors if no warm start data available
+  virtual std::vector<f_t> get_current_primal_solution_host() const                  = 0;
+  virtual std::vector<f_t> get_current_dual_solution_host() const                    = 0;
+  virtual std::vector<f_t> get_initial_primal_average_host() const                   = 0;
+  virtual std::vector<f_t> get_initial_dual_average_host() const                     = 0;
+  virtual std::vector<f_t> get_current_ATY_host() const                              = 0;
+  virtual std::vector<f_t> get_sum_primal_solutions_host() const                     = 0;
+  virtual std::vector<f_t> get_sum_dual_solutions_host() const                       = 0;
+  virtual std::vector<f_t> get_last_restart_duality_gap_primal_solution_host() const = 0;
+  virtual std::vector<f_t> get_last_restart_duality_gap_dual_solution_host() const   = 0;
+  virtual f_t get_initial_primal_weight() const                                      = 0;
+  virtual f_t get_initial_step_size() const                                          = 0;
+  virtual i_t get_total_pdlp_iterations() const                                      = 0;
+  virtual i_t get_total_pdhg_iterations() const                                      = 0;
+  virtual f_t get_last_candidate_kkt_score() const                                   = 0;
+  virtual f_t get_last_restart_kkt_score() const                                     = 0;
+  virtual f_t get_sum_solution_weight() const                                        = 0;
+  virtual i_t get_iterations_since_last_restart() const                              = 0;
+
+  /**
+   * @brief Convert to optimization_problem_solution_t (GPU-backed)
+   * This is used for remote execution: CPU solution -> GPU solution for return
+   * @param stream_view CUDA stream for device allocations
+   * @return GPU-backed solution
+   */
+  virtual optimization_problem_solution_t<i_t, f_t> to_gpu_solution(
+    rmm::cuda_stream_view stream_view) = 0;
+
+  /**
+   * @brief Convert to Python/Cython return type (polymorphic version)
+   * This method allows backend-agnostic conversion to Python return structs.
+   * GPU solutions return linear_programming_ret_t (device_buffer-backed).
+   * CPU solutions return cpu_linear_programming_ret_t (std::vector-backed).
+   * @return Variant containing either GPU or CPU return struct
+   */
+  virtual std::variant<cuopt::cython::linear_programming_ret_t,
+                       cuopt::cython::cpu_linear_programming_ret_t>
+  to_python_lp_ret() && = 0;
+};
+
+/**
+ * @brief Interface for MIP solutions
+ */
+template <typename i_t, typename f_t>
+class mip_solution_interface_t : public optimization_problem_solution_interface_t<i_t, f_t> {
+ public:
+  bool is_mip() const override { return true; }
+
+  // ============================================================================
+  // Base interface implementations for cross-type polymorphic access
+  // LP-only methods throw exceptions when called on MIP solutions
+  // ============================================================================
+
+  /**
+   * @brief Dual solution - not available for MIP solutions
+   * @throws std::logic_error always
+   */
+  const std::vector<f_t>& get_dual_solution() const override
+  {
+    throw std::logic_error("get_dual_solution() is not available for MIP solutions");
+  }
+
+  /**
+   * @brief Dual objective value - not available for MIP solutions
+   * @throws std::logic_error always
+   */
+  f_t get_dual_objective_value() const override
+  {
+    throw std::logic_error("get_dual_objective_value() is not available for MIP solutions");
+  }
+
+  /**
+   * @brief Reduced costs - not available for MIP solutions
+   * @throws std::logic_error always
+   */
+  const std::vector<f_t>& get_reduced_costs() const override
+  {
+    throw std::logic_error("get_reduced_costs() is not available for MIP solutions");
+  }
+
+  // ============================================================================
+  // MIP-specific methods
+  // ============================================================================
+
+  /**
+   * @brief Get the solution size
+   * @return Number of variables
+   */
+  virtual i_t get_solution_size() const = 0;
+
+  /**
+   * @brief Get solution as host vector
+   * @return Host vector of solution
+   */
+  virtual const std::vector<f_t>& get_solution_host() const = 0;
+
+  /**
+   * @brief Get objective value
+   * @return Objective value
+   */
+  virtual f_t get_objective_value() const = 0;
+
+  /**
+   * @brief Get solve time
+   * @return Total solve time in seconds
+   */
+  virtual f_t get_solve_time() const = 0;
+
+  /**
+   * @brief Get MIP gap
+   * @return MIP gap
+   */
+  virtual f_t get_mip_gap() const = 0;
+
+  /**
+   * @brief Get solution bound
+   * @return Solution bound
+   */
+  virtual f_t get_solution_bound() const = 0;
+
+  /**
+   * @brief Get termination status
+   * @return Termination status
+   */
+  virtual mip_termination_status_t get_termination_status() const = 0;
+
+  /**
+   * @brief Get termination status as integer (implements base interface)
+   * @return mip_termination_status_t cast to int
+   */
+  int get_termination_status_int() const override
+  {
+    return static_cast<int>(get_termination_status());
+  }
+
+  /**
+   * @brief Get presolve time
+   * @return Presolve time in seconds
+   */
+  virtual f_t get_presolve_time() const = 0;
+
+  /**
+   * @brief Get max constraint violation
+   * @return Maximum constraint violation
+   */
+  virtual f_t get_max_constraint_violation() const = 0;
+
+  /**
+   * @brief Get max integer violation
+   * @return Maximum integer violation
+   */
+  virtual f_t get_max_int_violation() const = 0;
+
+  /**
+   * @brief Get max variable bound violation
+   * @return Maximum variable bound violation
+   */
+  virtual f_t get_max_variable_bound_violation() const = 0;
+
+  /**
+   * @brief Get number of nodes
+   * @return Number of nodes explored
+   */
+  virtual i_t get_num_nodes() const = 0;
+
+  /**
+   * @brief Get number of simplex iterations
+   * @return Number of simplex iterations
+   */
+  virtual i_t get_num_simplex_iterations() const = 0;
+
+  /**
+   * @brief Convert to mip_solution_t (GPU-backed)
+   * This is used for remote execution: CPU solution -> GPU solution for return
+   * @param stream_view CUDA stream for device allocations
+   * @return GPU-backed solution
+   */
+  virtual mip_solution_t<i_t, f_t> to_gpu_solution(rmm::cuda_stream_view stream_view) = 0;
+
+  /**
+   * @brief Convert to Python/Cython return type (polymorphic version)
+   * This method allows backend-agnostic conversion to Python return structs.
+   * GPU solutions return mip_ret_t (device_buffer-backed).
+   * CPU solutions return cpu_mip_ret_t (std::vector-backed).
+   * @return Variant containing either GPU or CPU return struct
+   */
+  virtual std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t>
+  to_python_mip_ret() && = 0;
+};
+
+// Forward declarations of concrete implementations
+template <typename i_t, typename f_t>
+class gpu_lp_solution_t;
+template <typename i_t, typename f_t>
+class cpu_lp_solution_t;
+template <typename i_t, typename f_t>
+class gpu_mip_solution_t;
+template <typename i_t, typename f_t>
+class cpu_mip_solution_t;
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp b/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp
new file mode 100644
index 000000000..519380754
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/optimization_problem_utils.hpp
@@ -0,0 +1,264 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/solver_settings.hpp>
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/mps_data_model.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Helper function to populate optimization_problem_interface_t from mps_data_model_t
+ *
+ * This avoids creating a temporary optimization_problem_t which requires GPU memory allocation.
+ * Instead, it directly populates the interface which can use either CPU or GPU memory.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Floating point type for values
+ * @param[out] problem The optimization problem interface to populate
+ * @param[in] data_model The MPS data model containing the problem data
+ */
+template <typename i_t, typename f_t>
+void populate_from_mps_data_model(optimization_problem_interface_t<i_t, f_t>* problem,
+                                  const mps_parser::mps_data_model_t<i_t, f_t>& data_model)
+{
+  // Set scalar values
+  problem->set_maximize(data_model.get_sense());
+  problem->set_objective_scaling_factor(data_model.get_objective_scaling_factor());
+  problem->set_objective_offset(data_model.get_objective_offset());
+
+  // Set string values
+  if (!data_model.get_objective_name().empty())
+    problem->set_objective_name(data_model.get_objective_name());
+  if (!data_model.get_problem_name().empty())
+    problem->set_problem_name(data_model.get_problem_name());
+  if (!data_model.get_variable_names().empty())
+    problem->set_variable_names(data_model.get_variable_names());
+  if (!data_model.get_row_names().empty()) problem->set_row_names(data_model.get_row_names());
+
+  // Set array values
+  i_t n_vars        = data_model.get_n_variables();
+  i_t n_constraints = data_model.get_n_constraints();
+
+  const auto& obj_coeffs = data_model.get_objective_coefficients();
+  if (!obj_coeffs.empty()) { problem->set_objective_coefficients(obj_coeffs.data(), n_vars); }
+
+  const auto& A_offsets = data_model.get_constraint_matrix_offsets();
+  if (!A_offsets.empty() && A_offsets.size() > static_cast<size_t>(n_constraints)) {
+    i_t n_nonzeros = A_offsets[n_constraints];
+    if (n_nonzeros > 0) {
+      problem->set_csr_constraint_matrix(data_model.get_constraint_matrix_values().data(),
+                                         n_nonzeros,
+                                         data_model.get_constraint_matrix_indices().data(),
+                                         n_nonzeros,
+                                         A_offsets.data(),
+                                         n_constraints + 1);
+    }
+  }
+
+  const auto& con_bounds = data_model.get_constraint_bounds();
+  if (!con_bounds.empty()) { problem->set_constraint_bounds(con_bounds.data(), n_constraints); }
+  const auto& con_lb = data_model.get_constraint_lower_bounds();
+  if (!con_lb.empty()) { problem->set_constraint_lower_bounds(con_lb.data(), n_constraints); }
+
+  const auto& con_ub = data_model.get_constraint_upper_bounds();
+  if (!con_ub.empty()) { problem->set_constraint_upper_bounds(con_ub.data(), n_constraints); }
+
+  const auto& row_types = data_model.get_row_types();
+  if (!row_types.empty()) { problem->set_row_types(row_types.data(), n_constraints); }
+
+  const auto& var_lb = data_model.get_variable_lower_bounds();
+  if (!var_lb.empty()) { problem->set_variable_lower_bounds(var_lb.data(), n_vars); }
+
+  const auto& var_ub = data_model.get_variable_upper_bounds();
+  if (!var_ub.empty()) { problem->set_variable_upper_bounds(var_ub.data(), n_vars); }
+
+  // Convert variable types from char to enum
+  const auto& char_variable_types = data_model.get_variable_types();
+  if (!char_variable_types.empty()) {
+    std::vector<var_t> enum_variable_types(char_variable_types.size());
+    for (size_t i = 0; i < char_variable_types.size(); ++i) {
+      enum_variable_types[i] = (char_variable_types[i] == 'I' || char_variable_types[i] == 'B')
+                                 ? var_t::INTEGER
+                                 : var_t::CONTINUOUS;
+    }
+    problem->set_variable_types(enum_variable_types.data(), n_vars);
+    // Problem category (LP/MIP/IP) is auto-detected by set_variable_types
+  }
+
+  // Handle quadratic objective if present
+  if (data_model.has_quadratic_objective()) {
+    i_t q_nonzeros = data_model.get_quadratic_objective_offsets()[n_vars];
+    problem->set_quadratic_objective_matrix(data_model.get_quadratic_objective_values().data(),
+                                            q_nonzeros,
+                                            data_model.get_quadratic_objective_indices().data(),
+                                            q_nonzeros,
+                                            data_model.get_quadratic_objective_offsets().data(),
+                                            n_vars + 1);
+  }
+}
+
+/**
+ * @brief Helper function to populate optimization_problem_interface_t from data_model_view_t
+ *
+ * This is used by the Python Cython interface which provides data_model_view_t.
+ * Similar to populate_from_mps_data_model but works with data_model_view_t instead.
+ *
+ * @tparam i_t Integer type for indices
+ * @tparam f_t Floating point type for values
+ * @param[out] problem The optimization problem interface to populate
+ * @param[in] data_model The data model view containing the problem data
+ * @param[in] solver_settings Optional solver settings (for warmstart data, GPU only)
+ * @param[in] handle Optional RAFT handle (for warmstart data, GPU only)
+ */
+template <typename i_t, typename f_t>
+void populate_from_data_model_view(optimization_problem_interface_t<i_t, f_t>* problem,
+                                   cuopt::mps_parser::data_model_view_t<i_t, f_t>* data_model,
+                                   solver_settings_t<i_t, f_t>* solver_settings = nullptr,
+                                   const raft::handle_t* handle                 = nullptr)
+{
+  problem->set_maximize(data_model->get_sense());
+
+  if (data_model->get_constraint_matrix_values().size() != 0 &&
+      data_model->get_constraint_matrix_indices().size() != 0 &&
+      data_model->get_constraint_matrix_offsets().size() != 0) {
+    problem->set_csr_constraint_matrix(data_model->get_constraint_matrix_values().data(),
+                                       data_model->get_constraint_matrix_values().size(),
+                                       data_model->get_constraint_matrix_indices().data(),
+                                       data_model->get_constraint_matrix_indices().size(),
+                                       data_model->get_constraint_matrix_offsets().data(),
+                                       data_model->get_constraint_matrix_offsets().size());
+  }
+
+  if (data_model->get_constraint_bounds().size() != 0) {
+    problem->set_constraint_bounds(data_model->get_constraint_bounds().data(),
+                                   data_model->get_constraint_bounds().size());
+  }
+
+  if (data_model->get_objective_coefficients().size() != 0) {
+    problem->set_objective_coefficients(data_model->get_objective_coefficients().data(),
+                                        data_model->get_objective_coefficients().size());
+  }
+
+  problem->set_objective_scaling_factor(data_model->get_objective_scaling_factor());
+  problem->set_objective_offset(data_model->get_objective_offset());
+
+  // Handle warmstart data with GPU↔CPU conversion if needed
+  if (solver_settings != nullptr) {
+    bool target_is_gpu = (handle != nullptr);
+
+    // Check which warmstart type is populated
+    // Note: Python sets the VIEW (spans), so check both view and data for GPU warmstart
+    // CPU warmstart is set directly in the data structure
+    bool has_gpu_warmstart_view = (solver_settings->get_pdlp_warm_start_data_view()
+                                     .last_restart_duality_gap_dual_solution_.size() > 0);
+    bool has_gpu_warmstart_data =
+      solver_settings->get_pdlp_settings().get_pdlp_warm_start_data().is_populated();
+    bool has_cpu_warmstart =
+      solver_settings->get_pdlp_settings().get_cpu_pdlp_warm_start_data().is_populated();
+
+    bool has_gpu_warmstart = has_gpu_warmstart_view || has_gpu_warmstart_data;
+
+    if (has_gpu_warmstart || has_cpu_warmstart) {
+      if (target_is_gpu) {
+        // Target is GPU backend
+        if (has_gpu_warmstart_view) {
+          // GPU warmstart from Python → GPU backend: copy view (spans) to data (device_uvectors)
+          // Python sets the view (spans over cuDF), but solver needs device_uvectors
+          pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data(
+            solver_settings->get_pdlp_warm_start_data_view(), handle->get_stream());
+          solver_settings->get_pdlp_settings().set_pdlp_warm_start_data(pdlp_warm_start_data);
+        } else if (has_gpu_warmstart_data) {
+          // GPU warmstart from C++ API → GPU backend: data already set, nothing to do
+          // The device_uvectors are already populated in the settings
+        } else {
+          // CPU warmstart → GPU backend: convert H2D
+          pdlp_warm_start_data_t<i_t, f_t> gpu_warmstart = convert_to_gpu_warmstart(
+            solver_settings->get_pdlp_settings().get_cpu_pdlp_warm_start_data(),
+            handle->get_stream());
+          solver_settings->get_pdlp_settings().set_pdlp_warm_start_data(gpu_warmstart);
+        }
+      } else {
+        // Target is CPU backend (remote execution)
+        if (has_cpu_warmstart) {
+          // CPU warmstart → CPU backend: data already in correct form, nothing to do
+        } else {
+          // GPU warmstart → CPU backend: convert D2H
+          // Note: This requires a valid CUDA stream even though target is CPU
+          // Use rmm::cuda_stream_per_thread for the conversion
+          cpu_pdlp_warm_start_data_t<i_t, f_t> cpu_warmstart = convert_to_cpu_warmstart(
+            solver_settings->get_pdlp_settings().get_pdlp_warm_start_data(),
+            rmm::cuda_stream_per_thread);
+          solver_settings->get_pdlp_settings().get_cpu_pdlp_warm_start_data() =
+            std::move(cpu_warmstart);
+        }
+      }
+    }
+  }
+
+  if (data_model->get_quadratic_objective_values().size() != 0 &&
+      data_model->get_quadratic_objective_indices().size() != 0 &&
+      data_model->get_quadratic_objective_offsets().size() != 0) {
+    problem->set_quadratic_objective_matrix(data_model->get_quadratic_objective_values().data(),
+                                            data_model->get_quadratic_objective_values().size(),
+                                            data_model->get_quadratic_objective_indices().data(),
+                                            data_model->get_quadratic_objective_indices().size(),
+                                            data_model->get_quadratic_objective_offsets().data(),
+                                            data_model->get_quadratic_objective_offsets().size());
+  }
+
+  if (data_model->get_variable_lower_bounds().size() != 0) {
+    problem->set_variable_lower_bounds(data_model->get_variable_lower_bounds().data(),
+                                       data_model->get_variable_lower_bounds().size());
+  }
+
+  if (data_model->get_variable_upper_bounds().size() != 0) {
+    problem->set_variable_upper_bounds(data_model->get_variable_upper_bounds().data(),
+                                       data_model->get_variable_upper_bounds().size());
+  }
+
+  if (data_model->get_row_types().size() != 0) {
+    problem->set_row_types(data_model->get_row_types().data(), data_model->get_row_types().size());
+  }
+
+  if (data_model->get_constraint_lower_bounds().size() != 0) {
+    problem->set_constraint_lower_bounds(data_model->get_constraint_lower_bounds().data(),
+                                         data_model->get_constraint_lower_bounds().size());
+  }
+
+  if (data_model->get_constraint_upper_bounds().size() != 0) {
+    problem->set_constraint_upper_bounds(data_model->get_constraint_upper_bounds().data(),
+                                         data_model->get_constraint_upper_bounds().size());
+  }
+
+  if (data_model->get_variable_types().size() != 0) {
+    std::vector<var_t> enum_variable_types(data_model->get_variable_types().size());
+    std::transform(
+      data_model->get_variable_types().data(),
+      data_model->get_variable_types().data() + data_model->get_variable_types().size(),
+      enum_variable_types.begin(),
+      [](const auto val) -> var_t {
+        return (val == 'I' || val == 'B') ? var_t::INTEGER : var_t::CONTINUOUS;
+      });
+    problem->set_variable_types(enum_variable_types.data(), enum_variable_types.size());
+    // Problem category (LP/MIP/IP) is auto-detected by set_variable_types
+  }
+
+  if (data_model->get_variable_names().size() != 0) {
+    problem->set_variable_names(data_model->get_variable_names());
+  }
+
+  if (data_model->get_row_names().size() != 0) {
+    problem->set_row_names(data_model->get_row_names());
+  }
+}
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp b/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
index fb46879f6..363e41662 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp
@@ -70,6 +70,9 @@ struct pdlp_warm_start_data_t {
   pdlp_warm_start_data_t(const pdlp_warm_start_data_t<i_t, f_t>& other);
   pdlp_warm_start_data_t& operator=(pdlp_warm_start_data_t&& other) = default;
 
+  // Check if warmstart data is populated (same sentinel check as release/26.02)
+  bool is_populated() const { return last_restart_duality_gap_dual_solution_.size() > 0; }
+
  private:
   // Check sizes through assertion
   void check_sizes();
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 3b94fee14..01d18ea93 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -8,6 +8,7 @@
 #pragma once
 
 #include <cuopt/linear_programming/constants.h>
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
 #include <optional>
@@ -178,6 +179,10 @@ class pdlp_solver_settings_t {
   const pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data() const noexcept;
   pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data();
   const pdlp_warm_start_data_view_t<i_t, f_t>& get_pdlp_warm_start_data_view() const noexcept;
+
+  // CPU warmstart data accessors (for remote execution)
+  const cpu_pdlp_warm_start_data_t<i_t, f_t>& get_cpu_pdlp_warm_start_data() const noexcept;
+  cpu_pdlp_warm_start_data_t<i_t, f_t>& get_cpu_pdlp_warm_start_data() noexcept;
   // TODO batch mode: tmp
   std::optional<f_t> get_initial_step_size() const;
   // TODO batch mode: tmp
@@ -252,10 +257,12 @@ class pdlp_solver_settings_t {
   /** Initial primal weight */
   // TODO batch mode: tmp
   std::optional<f_t> initial_primal_weight_;
-  // For the C++ interface
+  // For the C++ interface - GPU warmstart
   pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
   // For the Cython interface
   pdlp_warm_start_data_view_t<i_t, f_t> pdlp_warm_start_data_view_;
+  // For remote execution - CPU warmstart
+  cpu_pdlp_warm_start_data_t<i_t, f_t> cpu_pdlp_warm_start_data_;
 
   friend class solver_settings_t<i_t, f_t>;
 };
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index b30a3908f..8715cf3db 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -10,11 +10,14 @@
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
 
+#include <memory>
 #include <mps_parser/mps_data_model.hpp>
 #include <string>
 #include <vector>
@@ -144,4 +147,116 @@ optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model);
 
+// ============================================================================
+// New overloads for optimization_problem_interface_t with remote execution
+// ============================================================================
+
+/**
+ * @brief Linear programming solve function for interface-based problems.
+ *
+ * This overload automatically handles remote execution:
+ * - If remote execution is enabled (via CUOPT_REMOTE_HOST/PORT), calls solve_lp_remote
+ * - Otherwise, converts to optimization_problem_t and calls the standard solve_lp
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] problem_interface  Interface to optimization problem (GPU or CPU backed)
+ * @param[in] settings  PDLP solver settings
+ * @param[in] problem_checking  If true, the problem is checked for consistency
+ * @param[in] use_pdlp_solver_mode  If true, use PDLP hyperparameters from solver mode
+ * @param[in] is_batch_mode  If true, batch solve mode is enabled
+ * @return std::unique_ptr<lp_solution_interface_t<i_t, f_t>> Polymorphic solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+  bool problem_checking                            = true,
+  bool use_pdlp_solver_mode                        = true,
+  bool is_batch_mode                               = false);
+
+/**
+ * @brief Mixed integer programming solve function for interface-based problems.
+ *
+ * This overload automatically handles remote execution:
+ * - If remote execution is enabled (via CUOPT_REMOTE_HOST/PORT), calls solve_mip_remote
+ * - Otherwise, converts to optimization_problem_t and calls the standard solve_mip
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] problem_interface  Interface to optimization problem (GPU or CPU backed)
+ * @param[in] settings  MIP solver settings
+ * @return std::unique_ptr<mip_solution_interface_t<i_t, f_t>> Polymorphic solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
+// ============================================================================
+// Remote execution stubs (to be implemented later)
+// ============================================================================
+
+/**
+ * @brief Remote LP solver stub (placeholder implementation)
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] cpu_problem  CPU-backed optimization problem
+ * @param[in] settings  PDLP solver settings
+ * @return std::unique_ptr<lp_solution_interface_t<i_t, f_t>> Solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{});
+
+/**
+ * @brief Remote MIP solver stub (placeholder implementation)
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] cpu_problem  CPU-backed optimization problem
+ * @param[in] settings  MIP solver settings
+ * @return std::unique_ptr<mip_solution_interface_t<i_t, f_t>> Solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
+/**
+ * @brief Remote LP solver for GPU problems
+ *
+ * Converts GPU problem to CPU format and calls CPU remote solver.
+ * Use case: Local GPU for problem manipulation, remote powerful GPU for solving.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] gpu_problem  GPU-backed optimization problem
+ * @param[in] settings  PDLP solver settings
+ * @return std::unique_ptr<lp_solution_interface_t<i_t, f_t>> Solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{});
+
+/**
+ * @brief Remote MIP solver for GPU problems
+ *
+ * Converts GPU problem to CPU format and calls CPU remote solver.
+ * Use case: Local GPU for problem manipulation, remote powerful GPU for solving.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ * @param[in] gpu_problem  GPU-backed optimization problem
+ * @param[in] settings  MIP solver settings
+ * @return std::unique_ptr<mip_solution_interface_t<i_t, f_t>> Solution interface
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/solve_remote.hpp b/cpp/include/cuopt/linear_programming/solve_remote.hpp
new file mode 100644
index 000000000..9ca01e36a
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/solve_remote.hpp
@@ -0,0 +1,66 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+// Include the solution interface definitions so unique_ptr can properly delete them
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
+
+#include <memory>
+
+namespace cuopt::linear_programming {
+
+// Forward declarations for optimization problems (only declaration needed, not definition)
+template <typename i_t, typename f_t>
+class cpu_optimization_problem_t;
+
+template <typename i_t, typename f_t>
+class gpu_optimization_problem_t;
+
+template <typename i_t, typename f_t>
+struct pdlp_solver_settings_t;
+
+template <typename i_t, typename f_t>
+struct mip_solver_settings_t;
+
+// ============================================================================
+// Remote Execution Functions
+// ============================================================================
+
+/**
+ * @brief Solve LP problem remotely (CPU backend)
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings);
+
+/**
+ * @brief Solve LP problem remotely (GPU backend)
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings);
+
+/**
+ * @brief Solve MIP problem remotely (CPU backend)
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings);
+
+/**
+ * @brief Solve MIP problem remotely (GPU backend)
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index e1a75747d..33aa5bcd6 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -1,97 +1,55 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #pragma once
 
-#include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
-#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
-#include <cuopt/linear_programming/utilities/internals.hpp>
+#include <cuopt/linear_programming/utilities/cython_types.hpp>
+
 #include <memory>
 #include <mps_parser/data_model_view.hpp>
 #include <raft/core/handle.hpp>
 #include <string>
 #include <utility>
+#include <variant>
 #include <vector>
 
 namespace cuopt {
 namespace cython {
 
-// aggregate for call_solve() return type
-// to be exposed to cython:
-struct linear_programming_ret_t {
-  std::unique_ptr<rmm::device_buffer> primal_solution_;
-  std::unique_ptr<rmm::device_buffer> dual_solution_;
-  std::unique_ptr<rmm::device_buffer> reduced_cost_;
-  /* -- PDLP Warm Start Data -- */
-  std::unique_ptr<rmm::device_buffer> current_primal_solution_;
-  std::unique_ptr<rmm::device_buffer> current_dual_solution_;
-  std::unique_ptr<rmm::device_buffer> initial_primal_average_;
-  std::unique_ptr<rmm::device_buffer> initial_dual_average_;
-  std::unique_ptr<rmm::device_buffer> current_ATY_;
-  std::unique_ptr<rmm::device_buffer> sum_primal_solutions_;
-  std::unique_ptr<rmm::device_buffer> sum_dual_solutions_;
-  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_primal_solution_;
-  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_dual_solution_;
-  double initial_primal_weight_;
-  double initial_step_size_;
-  int total_pdlp_iterations_;
-  int total_pdhg_iterations_;
-  double last_candidate_kkt_score_;
-  double last_restart_kkt_score_;
-  double sum_solution_weight_;
-  int iterations_since_last_restart_;
-  /* -- /PDLP Warm Start Data -- */
-
-  linear_programming::pdlp_termination_status_t termination_status_;
-  error_type_t error_status_;
-  std::string error_message_;
-
-  /*Termination stats*/
-  double l2_primal_residual_;
-  double l2_dual_residual_;
-  double primal_objective_;
-  double dual_objective_;
-  double gap_;
-  int nb_iterations_;
-  double solve_time_;
-  bool solved_by_pdlp_;
-};
-
-struct mip_ret_t {
-  std::unique_ptr<rmm::device_buffer> solution_;
-
-  linear_programming::mip_termination_status_t termination_status_;
-  error_type_t error_status_;
-  std::string error_message_;
-
-  /*Termination stats*/
-  double objective_;
-  double mip_gap_;
-  double solution_bound_;
-  double total_solve_time_;
-  double presolve_time_;
-  double max_constraint_violation_;
-  double max_int_violation_;
-  double max_variable_bound_violation_;
-  int nodes_;
-  int simplex_iterations_;
-};
+// Type definitions moved to cython_types.hpp to avoid circular dependencies
+// The types linear_programming_ret_t, cpu_linear_programming_ret_t, mip_ret_t, cpu_mip_ret_t
+// are now defined in cython_types.hpp
 
+// Aggregate for call_solve() return type
+// Uses std::variant to hold either GPU or CPU solution structs
 struct solver_ret_t {
   linear_programming::problem_category_t problem_type;
-  linear_programming_ret_t lp_ret;
-  mip_ret_t mip_ret;
-  // possibly add dual simplex return structure
+  std::variant<linear_programming_ret_t, cpu_linear_programming_ret_t> lp_ret;
+  std::variant<mip_ret_t, cpu_mip_ret_t> mip_ret;
 };
 
-// Wrapper for solve to expose the API to cython.
+// Wrapper functions to expose the API to cython.
+
+// Call solve_lp and return solution interface pointer
+linear_programming::lp_solution_interface_t<int, double>* call_solve_lp(
+  linear_programming::optimization_problem_interface_t<int, double>* problem_interface,
+  linear_programming::pdlp_solver_settings_t<int, double>& solver_settings,
+  bool is_batch_mode = false);
+
+// Call solve_mip and return solution interface pointer
+linear_programming::mip_solution_interface_t<int, double>* call_solve_mip(
+  linear_programming::optimization_problem_interface_t<int, double>* problem_interface,
+  linear_programming::mip_solver_settings_t<int, double>& solver_settings);
 
+// Main solve entry point from Python
 std::unique_ptr<solver_ret_t> call_solve(cuopt::mps_parser::data_model_view_t<int, double>*,
                                          linear_programming::solver_settings_t<int, double>*,
                                          unsigned int flags = cudaStreamNonBlocking,
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp
new file mode 100644
index 000000000..44168d0a3
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_types.hpp
@@ -0,0 +1,146 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <cuopt/linear_programming/utilities/internals.hpp>
+
+#include <rmm/device_buffer.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace cuopt {
+namespace cython {
+
+// GPU-backed LP solution struct (uses device memory)
+struct linear_programming_ret_t {
+  std::unique_ptr<rmm::device_buffer> primal_solution_;
+  std::unique_ptr<rmm::device_buffer> dual_solution_;
+  std::unique_ptr<rmm::device_buffer> reduced_cost_;
+  /* -- PDLP Warm Start Data -- */
+  std::unique_ptr<rmm::device_buffer> current_primal_solution_;
+  std::unique_ptr<rmm::device_buffer> current_dual_solution_;
+  std::unique_ptr<rmm::device_buffer> initial_primal_average_;
+  std::unique_ptr<rmm::device_buffer> initial_dual_average_;
+  std::unique_ptr<rmm::device_buffer> current_ATY_;
+  std::unique_ptr<rmm::device_buffer> sum_primal_solutions_;
+  std::unique_ptr<rmm::device_buffer> sum_dual_solutions_;
+  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_primal_solution_;
+  std::unique_ptr<rmm::device_buffer> last_restart_duality_gap_dual_solution_;
+  double initial_primal_weight_;
+  double initial_step_size_;
+  int total_pdlp_iterations_;
+  int total_pdhg_iterations_;
+  double last_candidate_kkt_score_;
+  double last_restart_kkt_score_;
+  double sum_solution_weight_;
+  int iterations_since_last_restart_;
+  /* -- /PDLP Warm Start Data -- */
+
+  linear_programming::pdlp_termination_status_t termination_status_;
+  error_type_t error_status_;
+  std::string error_message_;
+
+  /*Termination stats*/
+  double l2_primal_residual_;
+  double l2_dual_residual_;
+  double primal_objective_;
+  double dual_objective_;
+  double gap_;
+  int nb_iterations_;
+  double solve_time_;
+  bool solved_by_pdlp_;
+};
+
+// CPU-backed LP solution struct (uses host memory)
+struct cpu_linear_programming_ret_t {
+  std::vector<double> primal_solution_;
+  std::vector<double> dual_solution_;
+  std::vector<double> reduced_cost_;
+  /* -- PDLP Warm Start Data -- */
+  std::vector<double> current_primal_solution_;
+  std::vector<double> current_dual_solution_;
+  std::vector<double> initial_primal_average_;
+  std::vector<double> initial_dual_average_;
+  std::vector<double> current_ATY_;
+  std::vector<double> sum_primal_solutions_;
+  std::vector<double> sum_dual_solutions_;
+  std::vector<double> last_restart_duality_gap_primal_solution_;
+  std::vector<double> last_restart_duality_gap_dual_solution_;
+  double initial_primal_weight_;
+  double initial_step_size_;
+  int total_pdlp_iterations_;
+  int total_pdhg_iterations_;
+  double last_candidate_kkt_score_;
+  double last_restart_kkt_score_;
+  double sum_solution_weight_;
+  int iterations_since_last_restart_;
+  /* -- /PDLP Warm Start Data -- */
+
+  linear_programming::pdlp_termination_status_t termination_status_;
+  error_type_t error_status_;
+  std::string error_message_;
+
+  /*Termination stats*/
+  double l2_primal_residual_;
+  double l2_dual_residual_;
+  double primal_objective_;
+  double dual_objective_;
+  double gap_;
+  int nb_iterations_;
+  double solve_time_;
+  bool solved_by_pdlp_;
+};
+
+// GPU-backed MIP solution struct (uses device memory)
+struct mip_ret_t {
+  std::unique_ptr<rmm::device_buffer> solution_;
+
+  linear_programming::mip_termination_status_t termination_status_;
+  error_type_t error_status_;
+  std::string error_message_;
+
+  /*Termination stats*/
+  double objective_;
+  double mip_gap_;
+  double solution_bound_;
+  double total_solve_time_;
+  double presolve_time_;
+  double max_constraint_violation_;
+  double max_int_violation_;
+  double max_variable_bound_violation_;
+  int nodes_;
+  int simplex_iterations_;
+};
+
+// CPU-backed MIP solution struct (uses host memory)
+struct cpu_mip_ret_t {
+  std::vector<double> solution_;
+
+  linear_programming::mip_termination_status_t termination_status_;
+  error_type_t error_status_;
+  std::string error_message_;
+
+  /*Termination stats*/
+  double objective_;
+  double mip_gap_;
+  double solution_bound_;
+  double total_solve_time_;
+  double presolve_time_;
+  double max_constraint_violation_;
+  double max_int_violation_;
+  double max_variable_bound_violation_;
+  int nodes_;
+  int simplex_iterations_;
+};
+
+}  // namespace cython
+}  // namespace cuopt
diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt
index ced9da8ed..89f6b0379 100644
--- a/cpp/src/linear_programming/CMakeLists.txt
+++ b/cpp/src/linear_programming/CMakeLists.txt
@@ -7,14 +7,20 @@
 set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/gpu_optimization_problem.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/cpu_optimization_problem.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/backend_selection.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/solve_remote.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdhg.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_solution.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/solution_conversion.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/saddle_point.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/cusparse_view.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdlp_warm_start_data.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/cpu_pdlp_warm_start_data.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/initial_scaling_strategy/initial_scaling.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/step_size_strategy/adaptive_step_size_strategy.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/restart_strategy/pdlp_restart_strategy.cu
diff --git a/cpp/src/linear_programming/backend_selection.cpp b/cpp/src/linear_programming/backend_selection.cpp
new file mode 100644
index 000000000..e17bf2e8b
--- /dev/null
+++ b/cpp/src/linear_programming/backend_selection.cpp
@@ -0,0 +1,65 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <utilities/logger.hpp>
+
+#include <algorithm>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+namespace cuopt::linear_programming {
+
+bool is_remote_execution_enabled()
+{
+  const char* remote_host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* remote_port = std::getenv("CUOPT_REMOTE_PORT");
+  return (remote_host != nullptr && remote_port != nullptr);
+}
+
+execution_mode_t get_execution_mode()
+{
+  return is_remote_execution_enabled() ? execution_mode_t::REMOTE : execution_mode_t::LOCAL;
+}
+
+bool use_gpu_memory_for_remote()
+{
+  const char* use_gpu_mem = std::getenv("CUOPT_USE_GPU_MEM_FOR_REMOTE");
+  if (use_gpu_mem != nullptr) {
+    std::string value(use_gpu_mem);
+    // Convert to lowercase for case-insensitive comparison
+    std::transform(value.begin(), value.end(), value.begin(), ::tolower);
+    return (value == "true" || value == "1");
+  }
+  return false;
+}
+
+bool use_cpu_memory_for_local()
+{
+  const char* use_cpu_mem = std::getenv("CUOPT_USE_CPU_MEM_FOR_LOCAL");
+  if (use_cpu_mem != nullptr) {
+    std::string value(use_cpu_mem);
+    // Convert to lowercase for case-insensitive comparison
+    std::transform(value.begin(), value.end(), value.begin(), ::tolower);
+    return (value == "true" || value == "1");
+  }
+  return false;
+}
+
+memory_backend_t get_memory_backend_type()
+{
+  if (get_execution_mode() == execution_mode_t::LOCAL) {
+    // Local execution: GPU memory by default, CPU if test mode enabled
+    return use_cpu_memory_for_local() ? memory_backend_t::CPU : memory_backend_t::GPU;
+  }
+  // Remote execution: CPU memory by default, GPU if explicitly requested
+  return use_gpu_memory_for_remote() ? memory_backend_t::GPU : memory_backend_t::CPU;
+}
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/cpu_optimization_problem.cpp b/cpp/src/linear_programming/cpu_optimization_problem.cpp
new file mode 100644
index 000000000..36144ef79
--- /dev/null
+++ b/cpp/src/linear_programming/cpu_optimization_problem.cpp
@@ -0,0 +1,877 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/solve_remote.hpp>
+
+#include <mip/mip_constants.hpp>
+#include <mps_parser/writer.hpp>
+#include <utilities/logger.hpp>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <stdexcept>
+#include <unordered_map>
+
+namespace cuopt::linear_programming {
+
+// ==============================================================================
+// Constructor
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+cpu_optimization_problem_t<i_t, f_t>::cpu_optimization_problem_t(raft::handle_t const* handle_ptr)
+  : handle_ptr_(handle_ptr)
+{
+  fprintf(stderr, "cpu_optimization_problem_t constructor: Using CPU backend\n");
+}
+
+// ==============================================================================
+// Setters
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_maximize(bool maximize)
+{
+  maximize_ = maximize;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_values,
+                                                                     i_t size_values,
+                                                                     const i_t* A_indices,
+                                                                     i_t size_indices,
+                                                                     const i_t* A_offsets,
+                                                                     i_t size_offsets)
+{
+  n_constraints_ = size_offsets - 1;
+
+  A_.resize(size_values);
+  A_indices_.resize(size_indices);
+  A_offsets_.resize(size_offsets);
+
+  std::copy(A_values, A_values + size_values, A_.begin());
+  std::copy(A_indices, A_indices + size_indices, A_indices_.begin());
+  std::copy(A_offsets, A_offsets + size_offsets, A_offsets_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_constraint_bounds(const f_t* b, i_t size)
+{
+  b_.resize(size);
+  std::copy(b, b + size, b_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
+{
+  n_vars_ = size;
+  c_.resize(size);
+  std::copy(c, c + size, c_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_objective_scaling_factor(
+  f_t objective_scaling_factor)
+{
+  objective_scaling_factor_ = objective_scaling_factor;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_objective_offset(f_t objective_offset)
+{
+  objective_offset_ = objective_offset;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
+  const f_t* Q_values,
+  i_t size_values,
+  const i_t* Q_indices,
+  i_t size_indices,
+  const i_t* Q_offsets,
+  i_t size_offsets,
+  bool validate_positive_semi_definite)
+{
+  Q_values_.resize(size_values);
+  Q_indices_.resize(size_indices);
+  Q_offsets_.resize(size_offsets);
+
+  std::copy(Q_values, Q_values + size_values, Q_values_.begin());
+  std::copy(Q_indices, Q_indices + size_indices, Q_indices_.begin());
+  std::copy(Q_offsets, Q_offsets + size_offsets, Q_offsets_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_variable_lower_bounds(
+  const f_t* variable_lower_bounds, i_t size)
+{
+  variable_lower_bounds_.resize(size);
+  std::copy(variable_lower_bounds, variable_lower_bounds + size, variable_lower_bounds_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(
+  const f_t* variable_upper_bounds, i_t size)
+{
+  variable_upper_bounds_.resize(size);
+  std::copy(variable_upper_bounds, variable_upper_bounds + size, variable_upper_bounds_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* variable_types, i_t size)
+{
+  variable_types_.resize(size);
+  std::copy(variable_types, variable_types + size, variable_types_.begin());
+
+  // Auto-detect problem category based on variable types (matching original optimization_problem_t)
+  i_t n_integer = std::count_if(
+    variable_types_.begin(), variable_types_.end(), [](auto val) { return val == var_t::INTEGER; });
+  // By default it is LP
+  if (n_integer == size) {
+    problem_category_ = problem_category_t::IP;
+  } else if (n_integer > 0) {
+    problem_category_ = problem_category_t::MIP;
+  }
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
+{
+  problem_category_ = category;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
+  const f_t* constraint_lower_bounds, i_t size)
+{
+  constraint_lower_bounds_.resize(size);
+  std::copy(
+    constraint_lower_bounds, constraint_lower_bounds + size, constraint_lower_bounds_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds(
+  const f_t* constraint_upper_bounds, i_t size)
+{
+  constraint_upper_bounds_.resize(size);
+  std::copy(
+    constraint_upper_bounds, constraint_upper_bounds + size, constraint_upper_bounds_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t size)
+{
+  row_types_.resize(size);
+  std::copy(row_types, row_types + size, row_types_.begin());
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
+{
+  objective_name_ = objective_name;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
+{
+  problem_name_ = problem_name;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_variable_names(
+  const std::vector<std::string>& variable_names)
+{
+  var_names_ = variable_names;
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
+{
+  row_names_ = row_names;
+}
+
+// ==============================================================================
+// Device Getters - Throw exceptions (not supported for CPU implementation)
+// ==============================================================================
+
+namespace {
+[[noreturn]] void throw_gpu_not_supported(const char* method_name)
+{
+  throw std::runtime_error(std::string("cpu_optimization_problem_t::") + method_name +
+                           "(): GPU memory access is not supported in CPU implementation. "
+                           "Use the corresponding _host() method instead.");
+}
+}  // namespace
+
+template <typename i_t, typename f_t>
+i_t cpu_optimization_problem_t<i_t, f_t>::get_n_variables() const
+{
+  return n_vars_;
+}
+
+template <typename i_t, typename f_t>
+i_t cpu_optimization_problem_t<i_t, f_t>::get_n_constraints() const
+{
+  return n_constraints_;
+}
+
+template <typename i_t, typename f_t>
+i_t cpu_optimization_problem_t<i_t, f_t>::get_nnz() const
+{
+  return A_.size();
+}
+
+template <typename i_t, typename f_t>
+i_t cpu_optimization_problem_t<i_t, f_t>::get_n_integers() const
+{
+  i_t count = 0;
+  for (const auto& type : variable_types_) {
+    if (type == var_t::INTEGER) count++;
+  }
+  return count;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+  const
+{
+  throw_gpu_not_supported("get_constraint_matrix_values");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+{
+  throw_gpu_not_supported("get_constraint_matrix_values");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices() const
+{
+  throw_gpu_not_supported("get_constraint_matrix_indices");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
+{
+  throw_gpu_not_supported("get_constraint_matrix_indices");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets() const
+{
+  throw_gpu_not_supported("get_constraint_matrix_offsets");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
+{
+  throw_gpu_not_supported("get_constraint_matrix_offsets");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
+{
+  throw_gpu_not_supported("get_constraint_bounds");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds()
+{
+  throw_gpu_not_supported("get_constraint_bounds");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+  const
+{
+  throw_gpu_not_supported("get_objective_coefficients");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+{
+  throw_gpu_not_supported("get_objective_coefficients");
+}
+
+template <typename i_t, typename f_t>
+f_t cpu_optimization_problem_t<i_t, f_t>::get_objective_scaling_factor() const
+{
+  return objective_scaling_factor_;
+}
+
+template <typename i_t, typename f_t>
+f_t cpu_optimization_problem_t<i_t, f_t>::get_objective_offset() const
+{
+  return objective_offset_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+  const
+{
+  throw_gpu_not_supported("get_variable_lower_bounds");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+{
+  throw_gpu_not_supported("get_variable_lower_bounds");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+  const
+{
+  throw_gpu_not_supported("get_variable_upper_bounds");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+{
+  throw_gpu_not_supported("get_variable_upper_bounds");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+  const
+{
+  throw_gpu_not_supported("get_constraint_lower_bounds");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+{
+  throw_gpu_not_supported("get_constraint_lower_bounds");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+  const
+{
+  throw_gpu_not_supported("get_constraint_upper_bounds");
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+{
+  throw_gpu_not_supported("get_constraint_upper_bounds");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<char>& cpu_optimization_problem_t<i_t, f_t>::get_row_types() const
+{
+  throw_gpu_not_supported("get_row_types");
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<var_t>& cpu_optimization_problem_t<i_t, f_t>::get_variable_types() const
+{
+  throw_gpu_not_supported("get_variable_types");
+}
+
+template <typename i_t, typename f_t>
+bool cpu_optimization_problem_t<i_t, f_t>::get_sense() const
+{
+  return maximize_;
+}
+
+template <typename i_t, typename f_t>
+bool cpu_optimization_problem_t<i_t, f_t>::empty() const
+{
+  return n_vars_ == 0 || n_constraints_ == 0;
+}
+
+template <typename i_t, typename f_t>
+std::string cpu_optimization_problem_t<i_t, f_t>::get_objective_name() const
+{
+  return objective_name_;
+}
+
+template <typename i_t, typename f_t>
+std::string cpu_optimization_problem_t<i_t, f_t>::get_problem_name() const
+{
+  return problem_name_;
+}
+
+template <typename i_t, typename f_t>
+problem_category_t cpu_optimization_problem_t<i_t, f_t>::get_problem_category() const
+{
+  return problem_category_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& cpu_optimization_problem_t<i_t, f_t>::get_variable_names() const
+{
+  return var_names_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& cpu_optimization_problem_t<i_t, f_t>::get_row_names() const
+{
+  return row_names_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<i_t>& cpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets()
+  const
+{
+  return Q_offsets_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<i_t>& cpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices()
+  const
+{
+  return Q_indices_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& cpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
+{
+  return Q_values_;
+}
+
+template <typename i_t, typename f_t>
+bool cpu_optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
+{
+  return !Q_values_.empty();
+}
+
+// ==============================================================================
+// Host Getters (return references to CPU memory)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values_host() const
+{
+  return A_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices_host() const
+{
+  return A_indices_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets_host() const
+{
+  return A_offsets_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds_host() const
+{
+  return b_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients_host() const
+{
+  return c_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds_host() const
+{
+  return variable_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds_host() const
+{
+  return variable_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds_host() const
+{
+  return constraint_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> cpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds_host() const
+{
+  return constraint_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<char> cpu_optimization_problem_t<i_t, f_t>::get_row_types_host() const
+{
+  return row_types_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<var_t> cpu_optimization_problem_t<i_t, f_t>::get_variable_types_host() const
+{
+  return variable_types_;
+}
+
+// ==============================================================================
+// Conversion to optimization_problem_t
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> cpu_optimization_problem_t<i_t, f_t>::to_optimization_problem()
+{
+  if (handle_ptr_ == nullptr) {
+    // NOTE: We could theoretically allocate GPU resources here, but we are not
+    // currently supporting local solve of a problem that has been built on the CPU.
+    // CPU problems are intended for remote execution only.
+    throw std::runtime_error(
+      "cpu_optimization_problem_t::to_optimization_problem(): "
+      "handle_ptr is null. Cannot convert to GPU-backed optimization_problem_t without CUDA "
+      "resources. "
+      "CPU problems are intended for remote execution only. "
+      "For local solving, create the problem with GPU backend from the start.");
+  }
+
+  optimization_problem_t<i_t, f_t> problem(handle_ptr_);
+
+  // Set scalar values
+  problem.set_maximize(maximize_);
+  problem.set_objective_scaling_factor(objective_scaling_factor_);
+  problem.set_objective_offset(objective_offset_);
+  problem.set_problem_category(problem_category_);
+
+  // Set string values
+  if (!objective_name_.empty()) problem.set_objective_name(objective_name_);
+  if (!problem_name_.empty()) problem.set_problem_name(problem_name_);
+  if (!var_names_.empty()) problem.set_variable_names(var_names_);
+  if (!row_names_.empty()) problem.set_row_names(row_names_);
+
+  // Set CSR constraint matrix (data will be copied to GPU by optimization_problem_t setters)
+  if (!A_.empty()) {
+    problem.set_csr_constraint_matrix(A_.data(),
+                                      A_.size(),
+                                      A_indices_.data(),
+                                      A_indices_.size(),
+                                      A_offsets_.data(),
+                                      A_offsets_.size());
+  }
+
+  // Set constraint bounds
+  if (!b_.empty()) { problem.set_constraint_bounds(b_.data(), b_.size()); }
+
+  // Set objective coefficients
+  if (!c_.empty()) { problem.set_objective_coefficients(c_.data(), c_.size()); }
+
+  // Set quadratic objective if present
+  if (!Q_values_.empty()) {
+    problem.set_quadratic_objective_matrix(Q_values_.data(),
+                                           Q_values_.size(),
+                                           Q_indices_.data(),
+                                           Q_indices_.size(),
+                                           Q_offsets_.data(),
+                                           Q_offsets_.size());
+  }
+
+  // Set variable bounds
+  if (!variable_lower_bounds_.empty()) {
+    problem.set_variable_lower_bounds(variable_lower_bounds_.data(), variable_lower_bounds_.size());
+  }
+  if (!variable_upper_bounds_.empty()) {
+    problem.set_variable_upper_bounds(variable_upper_bounds_.data(), variable_upper_bounds_.size());
+  }
+
+  // Set variable types
+  if (!variable_types_.empty()) {
+    problem.set_variable_types(variable_types_.data(), variable_types_.size());
+  }
+
+  // Set constraint bounds
+  if (!constraint_lower_bounds_.empty()) {
+    problem.set_constraint_lower_bounds(constraint_lower_bounds_.data(),
+                                        constraint_lower_bounds_.size());
+  }
+  if (!constraint_upper_bounds_.empty()) {
+    problem.set_constraint_upper_bounds(constraint_upper_bounds_.data(),
+                                        constraint_upper_bounds_.size());
+  }
+
+  // Set row types
+  if (!row_types_.empty()) { problem.set_row_types(row_types_.data(), row_types_.size()); }
+
+  return problem;
+}
+
+// ==============================================================================
+// File I/O
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
+{
+  // Data is already in host memory, so we can directly create a view and write
+  cuopt::mps_parser::data_model_view_t<i_t, f_t> data_model_view;
+
+  // Set optimization sense
+  data_model_view.set_maximize(maximize_);
+
+  // Set constraint matrix in CSR format
+  if (!A_.empty()) {
+    data_model_view.set_csr_constraint_matrix(A_.data(),
+                                              A_.size(),
+                                              A_indices_.data(),
+                                              A_indices_.size(),
+                                              A_offsets_.data(),
+                                              A_offsets_.size());
+  }
+
+  // Set constraint bounds (RHS)
+  if (!b_.empty()) { data_model_view.set_constraint_bounds(b_.data(), b_.size()); }
+
+  // Set objective coefficients
+  if (!c_.empty()) { data_model_view.set_objective_coefficients(c_.data(), c_.size()); }
+
+  // Set objective scaling and offset
+  data_model_view.set_objective_scaling_factor(objective_scaling_factor_);
+  data_model_view.set_objective_offset(objective_offset_);
+
+  // Set variable bounds
+  if (!variable_lower_bounds_.empty()) {
+    data_model_view.set_variable_lower_bounds(variable_lower_bounds_.data(),
+                                              variable_lower_bounds_.size());
+    data_model_view.set_variable_upper_bounds(variable_upper_bounds_.data(),
+                                              variable_upper_bounds_.size());
+  }
+
+  // Set row types (constraint types)
+  if (!row_types_.empty()) { data_model_view.set_row_types(row_types_.data(), row_types_.size()); }
+
+  // Set constraint bounds (lower and upper)
+  if (!constraint_lower_bounds_.empty() && !constraint_upper_bounds_.empty()) {
+    data_model_view.set_constraint_lower_bounds(constraint_lower_bounds_.data(),
+                                                constraint_lower_bounds_.size());
+    data_model_view.set_constraint_upper_bounds(constraint_upper_bounds_.data(),
+                                                constraint_upper_bounds_.size());
+  }
+
+  // Set problem and variable names if available
+  if (!problem_name_.empty()) { data_model_view.set_problem_name(problem_name_); }
+  if (!objective_name_.empty()) { data_model_view.set_objective_name(objective_name_); }
+  if (!var_names_.empty()) { data_model_view.set_variable_names(var_names_); }
+  if (!row_names_.empty()) { data_model_view.set_row_names(row_names_); }
+
+  // Set variable types (convert from enum to char)
+  // CRITICAL: Declare var_types_char OUTSIDE the if block so it stays alive
+  // until after write_mps() is called, since data_model_view stores a span (pointer) to it
+  std::vector<char> var_types_char;
+  if (!variable_types_.empty()) {
+    var_types_char.resize(variable_types_.size());
+
+    for (size_t i = 0; i < var_types_char.size(); ++i) {
+      var_types_char[i] = (variable_types_[i] == var_t::INTEGER) ? 'I' : 'C';
+    }
+
+    data_model_view.set_variable_types(var_types_char.data(), var_types_char.size());
+  }
+
+  cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
+}
+
+// ==============================================================================
+// Comparison
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+bool cpu_optimization_problem_t<i_t, f_t>::is_equivalent(
+  const optimization_problem_interface_t<i_t, f_t>& other) const
+{
+  // Compare scalar properties
+  if (maximize_ != other.get_sense()) return false;
+  if (n_vars_ != other.get_n_variables()) return false;
+  if (n_constraints_ != other.get_n_constraints()) return false;
+  if (objective_scaling_factor_ != other.get_objective_scaling_factor()) return false;
+  if (objective_offset_ != other.get_objective_offset()) return false;
+  if (problem_category_ != other.get_problem_category()) return false;
+
+  // Get host data from both problems
+  auto other_c = other.get_objective_coefficients_host();
+  if (c_.size() != other_c.size()) return false;
+
+  auto other_var_lb = other.get_variable_lower_bounds_host();
+  if (variable_lower_bounds_.size() != other_var_lb.size()) return false;
+
+  auto other_var_ub = other.get_variable_upper_bounds_host();
+  if (variable_upper_bounds_.size() != other_var_ub.size()) return false;
+
+  auto other_var_types = other.get_variable_types_host();
+  if (variable_types_.size() != other_var_types.size()) return false;
+
+  auto other_b = other.get_constraint_bounds_host();
+  if (b_.size() != other_b.size()) return false;
+
+  auto other_A_values = other.get_constraint_matrix_values_host();
+  if (A_.size() != other_A_values.size()) return false;
+
+  // Check if we have variable and row names for permutation matching
+  const auto& other_var_names = other.get_variable_names();
+  const auto& other_row_names = other.get_row_names();
+
+  if (var_names_.empty() || other_var_names.empty()) return false;
+  if (row_names_.empty() || other_row_names.empty()) return false;
+
+  // Build variable permutation map
+  std::unordered_map<std::string, i_t> other_var_idx;
+  for (size_t j = 0; j < other_var_names.size(); ++j) {
+    other_var_idx[other_var_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> var_perm(n_vars_);
+  for (i_t i = 0; i < n_vars_; ++i) {
+    auto it = other_var_idx.find(var_names_[i]);
+    if (it == other_var_idx.end()) return false;
+    var_perm[i] = it->second;
+  }
+
+  // Build row permutation map
+  std::unordered_map<std::string, i_t> other_row_idx;
+  for (size_t j = 0; j < other_row_names.size(); ++j) {
+    other_row_idx[other_row_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> row_perm(n_constraints_);
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    auto it = other_row_idx.find(row_names_[i]);
+    if (it == other_row_idx.end()) return false;
+    row_perm[i] = it->second;
+  }
+
+  // Compare variable-indexed arrays with permutation
+  for (i_t i = 0; i < n_vars_; ++i) {
+    i_t j = var_perm[i];
+    if (std::abs(c_[i] - other_c[j]) > 1e-9) return false;
+    if (std::abs(variable_lower_bounds_[i] - other_var_lb[j]) > 1e-9) return false;
+    if (std::abs(variable_upper_bounds_[i] - other_var_ub[j]) > 1e-9) return false;
+    if (variable_types_[i] != other_var_types[j]) return false;
+  }
+
+  // Compare constraint-indexed arrays with permutation
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    i_t j = row_perm[i];
+    if (std::abs(b_[i] - other_b[j]) > 1e-9) return false;
+  }
+
+  // For CSR matrix, we'd need more complex comparison - for now just check size matches
+  // A full implementation would need to compare matrix entries with row/column permutations
+  if (A_.size() != other_A_values.size()) return false;
+
+  return true;
+}
+
+// ==============================================================================
+// Remote Execution (Polymorphic Dispatch)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>>
+cpu_optimization_problem_t<i_t, f_t>::solve_lp_remote(
+  pdlp_solver_settings_t<i_t, f_t> const& settings) const
+{
+  // Forward to the cpu_optimization_problem_t overload
+  // Need to cast away const since solve functions take non-const reference
+  auto& non_const_this = const_cast<cpu_optimization_problem_t<i_t, f_t>&>(*this);
+  return ::cuopt::linear_programming::solve_lp_remote(non_const_this, settings);
+}
+
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>>
+cpu_optimization_problem_t<i_t, f_t>::solve_mip_remote(
+  mip_solver_settings_t<i_t, f_t> const& settings) const
+{
+  // Forward to the cpu_optimization_problem_t overload
+  auto& non_const_this = const_cast<cpu_optimization_problem_t<i_t, f_t>&>(*this);
+  return ::cuopt::linear_programming::solve_mip_remote(non_const_this, settings);
+}
+
+// ==============================================================================
+// C API Support: Copy to Host (CPU Implementation)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_objective_coefficients_to_host(f_t* output,
+                                                                               i_t size) const
+{
+  // Already in host memory - just copy
+  std::copy(c_.begin(), c_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_constraint_matrix_to_host(
+  f_t* values, i_t* indices, i_t* offsets, i_t num_values, i_t num_indices, i_t num_offsets) const
+{
+  // Already in host memory - just copy
+  std::copy(A_.begin(), A_.begin() + num_values, values);
+  std::copy(A_indices_.begin(), A_indices_.begin() + num_indices, indices);
+  std::copy(A_offsets_.begin(), A_offsets_.begin() + num_offsets, offsets);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_row_types_to_host(char* output, i_t size) const
+{
+  std::copy(row_types_.begin(), row_types_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_constraint_bounds_to_host(f_t* output,
+                                                                          i_t size) const
+{
+  std::copy(b_.begin(), b_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_constraint_lower_bounds_to_host(f_t* output,
+                                                                                i_t size) const
+{
+  std::copy(constraint_lower_bounds_.begin(), constraint_lower_bounds_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_constraint_upper_bounds_to_host(f_t* output,
+                                                                                i_t size) const
+{
+  std::copy(constraint_upper_bounds_.begin(), constraint_upper_bounds_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_variable_lower_bounds_to_host(f_t* output,
+                                                                              i_t size) const
+{
+  std::copy(variable_lower_bounds_.begin(), variable_lower_bounds_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_variable_upper_bounds_to_host(f_t* output,
+                                                                              i_t size) const
+{
+  std::copy(variable_upper_bounds_.begin(), variable_upper_bounds_.begin() + size, output);
+}
+
+template <typename i_t, typename f_t>
+void cpu_optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output,
+                                                                       i_t size) const
+{
+  std::copy(variable_types_.begin(), variable_types_.begin() + size, output);
+}
+
+// ==============================================================================
+// Template instantiations matching optimization_problem_t
+// ==============================================================================
+
+#if MIP_INSTANTIATE_FLOAT
+template class cpu_optimization_problem_t<int32_t, float>;
+#endif
+#if MIP_INSTANTIATE_DOUBLE
+template class cpu_optimization_problem_t<int32_t, double>;
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/cpu_pdlp_warm_start_data.cu b/cpp/src/linear_programming/cpu_pdlp_warm_start_data.cu
new file mode 100644
index 000000000..5aafd579a
--- /dev/null
+++ b/cpp/src/linear_programming/cpu_pdlp_warm_start_data.cu
@@ -0,0 +1,117 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/pdlp/pdlp_warm_start_data.hpp>
+#include <raft/core/copy.hpp>
+
+namespace cuopt::linear_programming {
+
+// Helper to copy device_uvector to std::vector (D2H)
+template <typename T>
+std::vector<T> device_to_host_vector(const rmm::device_uvector<T>& device_vec,
+                                     rmm::cuda_stream_view stream)
+{
+  if (device_vec.size() == 0) return std::vector<T>();
+
+  std::vector<T> host_vec(device_vec.size());
+  raft::copy(host_vec.data(), device_vec.data(), device_vec.size(), stream);
+  stream.synchronize();
+  return host_vec;
+}
+
+// Helper to copy std::vector to device_uvector (H2D)
+template <typename T>
+rmm::device_uvector<T> host_to_device_vector(const std::vector<T>& host_vec,
+                                             rmm::cuda_stream_view stream)
+{
+  if (host_vec.empty()) return rmm::device_uvector<T>(0, stream);
+
+  rmm::device_uvector<T> device_vec(host_vec.size(), stream);
+  raft::copy(device_vec.data(), host_vec.data(), host_vec.size(), stream);
+  stream.synchronize();
+  return device_vec;
+}
+
+// Convert GPU → CPU warmstart (D2H copy)
+template <typename i_t, typename f_t>
+cpu_pdlp_warm_start_data_t<i_t, f_t> convert_to_cpu_warmstart(
+  const pdlp_warm_start_data_t<i_t, f_t>& gpu_data, rmm::cuda_stream_view stream)
+{
+  cpu_pdlp_warm_start_data_t<i_t, f_t> cpu_data;
+
+  // Copy all vector fields from GPU to CPU
+  cpu_data.current_primal_solution_ =
+    device_to_host_vector(gpu_data.current_primal_solution_, stream);
+  cpu_data.current_dual_solution_ = device_to_host_vector(gpu_data.current_dual_solution_, stream);
+  cpu_data.initial_primal_average_ =
+    device_to_host_vector(gpu_data.initial_primal_average_, stream);
+  cpu_data.initial_dual_average_ = device_to_host_vector(gpu_data.initial_dual_average_, stream);
+  cpu_data.current_ATY_          = device_to_host_vector(gpu_data.current_ATY_, stream);
+  cpu_data.sum_primal_solutions_ = device_to_host_vector(gpu_data.sum_primal_solutions_, stream);
+  cpu_data.sum_dual_solutions_   = device_to_host_vector(gpu_data.sum_dual_solutions_, stream);
+  cpu_data.last_restart_duality_gap_primal_solution_ =
+    device_to_host_vector(gpu_data.last_restart_duality_gap_primal_solution_, stream);
+  cpu_data.last_restart_duality_gap_dual_solution_ =
+    device_to_host_vector(gpu_data.last_restart_duality_gap_dual_solution_, stream);
+
+  // Copy scalar fields
+  cpu_data.initial_primal_weight_         = gpu_data.initial_primal_weight_;
+  cpu_data.initial_step_size_             = gpu_data.initial_step_size_;
+  cpu_data.total_pdlp_iterations_         = gpu_data.total_pdlp_iterations_;
+  cpu_data.total_pdhg_iterations_         = gpu_data.total_pdhg_iterations_;
+  cpu_data.last_candidate_kkt_score_      = gpu_data.last_candidate_kkt_score_;
+  cpu_data.last_restart_kkt_score_        = gpu_data.last_restart_kkt_score_;
+  cpu_data.sum_solution_weight_           = gpu_data.sum_solution_weight_;
+  cpu_data.iterations_since_last_restart_ = gpu_data.iterations_since_last_restart_;
+
+  return cpu_data;
+}
+
+// Convert CPU → GPU warmstart (H2D copy)
+template <typename i_t, typename f_t>
+pdlp_warm_start_data_t<i_t, f_t> convert_to_gpu_warmstart(
+  const cpu_pdlp_warm_start_data_t<i_t, f_t>& cpu_data, rmm::cuda_stream_view stream)
+{
+  pdlp_warm_start_data_t<i_t, f_t> gpu_data;
+
+  // Copy all vector fields from CPU to GPU
+  gpu_data.current_primal_solution_ =
+    host_to_device_vector(cpu_data.current_primal_solution_, stream);
+  gpu_data.current_dual_solution_ = host_to_device_vector(cpu_data.current_dual_solution_, stream);
+  gpu_data.initial_primal_average_ =
+    host_to_device_vector(cpu_data.initial_primal_average_, stream);
+  gpu_data.initial_dual_average_ = host_to_device_vector(cpu_data.initial_dual_average_, stream);
+  gpu_data.current_ATY_          = host_to_device_vector(cpu_data.current_ATY_, stream);
+  gpu_data.sum_primal_solutions_ = host_to_device_vector(cpu_data.sum_primal_solutions_, stream);
+  gpu_data.sum_dual_solutions_   = host_to_device_vector(cpu_data.sum_dual_solutions_, stream);
+  gpu_data.last_restart_duality_gap_primal_solution_ =
+    host_to_device_vector(cpu_data.last_restart_duality_gap_primal_solution_, stream);
+  gpu_data.last_restart_duality_gap_dual_solution_ =
+    host_to_device_vector(cpu_data.last_restart_duality_gap_dual_solution_, stream);
+
+  // Copy scalar fields
+  gpu_data.initial_primal_weight_         = cpu_data.initial_primal_weight_;
+  gpu_data.initial_step_size_             = cpu_data.initial_step_size_;
+  gpu_data.total_pdlp_iterations_         = cpu_data.total_pdlp_iterations_;
+  gpu_data.total_pdhg_iterations_         = cpu_data.total_pdhg_iterations_;
+  gpu_data.last_candidate_kkt_score_      = cpu_data.last_candidate_kkt_score_;
+  gpu_data.last_restart_kkt_score_        = cpu_data.last_restart_kkt_score_;
+  gpu_data.sum_solution_weight_           = cpu_data.sum_solution_weight_;
+  gpu_data.iterations_since_last_restart_ = cpu_data.iterations_since_last_restart_;
+
+  return gpu_data;
+}
+
+// Explicit template instantiations
+template cpu_pdlp_warm_start_data_t<int, double> convert_to_cpu_warmstart(
+  const pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
+
+template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
+  const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp
index 760f0eeca..2c954a78c 100644
--- a/cpp/src/linear_programming/cuopt_c.cpp
+++ b/cpp/src/linear_programming/cuopt_c.cpp
@@ -7,7 +7,10 @@
 
 #include <cuopt/linear_programming/cuopt_c.h>
 
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/utilities/timestamp_utils.hpp>
@@ -98,7 +101,8 @@ cuopt_int_t cuOptGetVersion(cuopt_int_t* version_major,
 
 cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* problem_ptr)
 {
-  problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
+  problem_and_stream_view_t* problem_and_stream =
+    new problem_and_stream_view_t(get_memory_backend_type());
   std::string filename_str(filename);
   bool input_mps_strict = false;
   std::unique_ptr<mps_data_model_t<cuopt_int_t, cuopt_float_t>> mps_data_model_ptr;
@@ -107,6 +111,7 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       parse_mps<cuopt_int_t, cuopt_float_t>(filename_str, input_mps_strict));
   } catch (const std::exception& e) {
     CUOPT_LOG_INFO("Error parsing MPS file: %s", e.what());
+    delete problem_and_stream;
     *problem_ptr = nullptr;
     if (std::string(e.what()).find("Error opening MPS file") != std::string::npos) {
       return CUOPT_MPS_FILE_ERROR;
@@ -114,11 +119,11 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       return CUOPT_MPS_PARSE_ERROR;
     }
   }
-  optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(mps_data_model_to_optimization_problem(
-      problem_and_stream->get_handle_ptr(), *mps_data_model_ptr));
-  problem_and_stream->op_problem = op_problem;
-  *problem_ptr                   = static_cast<cuOptOptimizationProblem>(problem_and_stream);
+
+  // Populate interface directly from MPS data model (avoids temporary GPU allocation)
+  populate_from_mps_data_model(problem_and_stream->get_problem(), *mps_data_model_ptr);
+
+  *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   return CUOPT_SUCCESS;
 }
 
@@ -134,7 +139,8 @@ cuopt_int_t cuOptWriteProblem(cuOptOptimizationProblem problem,
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
   try {
-    problem_and_stream_view->op_problem->write_to_mps(std::string(filename));
+    // Use the write_to_mps method from the interface (works for both CPU and GPU)
+    problem_and_stream_view->get_problem()->write_to_mps(std::string(filename));
   } catch (const std::exception& e) {
     CUOPT_LOG_INFO("Error writing MPS file: %s", e.what());
     return CUOPT_MPS_FILE_ERROR;
@@ -167,33 +173,36 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints,
     return CUOPT_INVALID_ARGUMENT;
   }
 
-  problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream_view_t* problem_and_stream =
+    new problem_and_stream_view_t(get_memory_backend_type());
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
+    auto* problem = problem_and_stream->get_problem();
+    problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
+    problem->set_objective_offset(objective_offset);
+    problem->set_objective_coefficients(objective_coefficients, num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
+    problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                       nnz,
+                                       constraint_matrix_column_indices,
+                                       nnz,
+                                       constraint_matrix_row_offsets,
+                                       num_constraints + 1);
+    problem->set_row_types(constraint_sense, num_constraints);
+    problem->set_constraint_bounds(rhs, num_constraints);
+    problem->set_variable_lower_bounds(lower_bounds, num_variables);
+    problem->set_variable_upper_bounds(upper_bounds, num_variables);
+
+    // Set variable types (problem category is auto-detected)
     std::vector<var_t> variable_types_host(num_variables);
     for (int j = 0; j < num_variables; j++) {
       variable_types_host[j] =
         variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+    problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   } catch (const raft::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -220,39 +229,48 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints,
       constraint_matrix_row_offsets == nullptr || constraint_matrix_column_indices == nullptr ||
       constraint_matrix_coefficent_values == nullptr || constraint_lower_bounds == nullptr ||
       constraint_upper_bounds == nullptr || variable_lower_bounds == nullptr ||
-      variable_upper_bounds == nullptr || variable_types == nullptr) {
+      variable_upper_bounds == nullptr) {
     return CUOPT_INVALID_ARGUMENT;
   }
 
-  problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream_view_t* problem_and_stream =
+    new problem_and_stream_view_t(get_memory_backend_type());
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
+    auto* problem = problem_and_stream->get_problem();
+    problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
+    problem->set_objective_offset(objective_offset);
+    problem->set_objective_coefficients(objective_coefficients, num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+    problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                       nnz,
+                                       constraint_matrix_column_indices,
+                                       nnz,
+                                       constraint_matrix_row_offsets,
+                                       num_constraints + 1);
+    problem->set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+    problem->set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+    problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
+    problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+    // Set variable types (NULL means all continuous)
+    // Problem category (LP/MIP/IP) is auto-detected by set_variable_types
     std::vector<var_t> variable_types_host(num_variables);
-    for (int j = 0; j < num_variables; j++) {
-      variable_types_host[j] =
-        variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+    if (variable_types != nullptr) {
+      for (int j = 0; j < num_variables; j++) {
+        variable_types_host[j] =
+          variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+      }
+    } else {
+      // Default to all continuous
+      for (int j = 0; j < num_variables; j++) {
+        variable_types_host[j] = var_t::CONTINUOUS;
+      }
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+    problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   } catch (const raft::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -288,35 +306,37 @@ cuopt_int_t cuOptCreateQuadraticProblem(
     return CUOPT_INVALID_ARGUMENT;
   }
 
-  problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream_view_t* problem_and_stream =
+    new problem_and_stream_view_t(get_memory_backend_type());
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
+    auto* problem = problem_and_stream->get_problem();
+    problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
+    problem->set_objective_offset(objective_offset);
+    problem->set_objective_coefficients(objective_coefficients, num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
+    problem->set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                            Q_nnz,
+                                            quadratic_objective_matrix_column_indices,
+                                            Q_nnz,
+                                            quadratic_objective_matrix_row_offsets,
+                                            num_variables + 1);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
+    problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                       nnz,
+                                       constraint_matrix_column_indices,
+                                       nnz,
+                                       constraint_matrix_row_offsets,
+                                       num_constraints + 1);
+    problem->set_row_types(constraint_sense, num_constraints);
+    problem->set_constraint_bounds(rhs, num_constraints);
+    problem->set_variable_lower_bounds(lower_bounds, num_variables);
+    problem->set_variable_upper_bounds(upper_bounds, num_variables);
+
+    // Quadratic problems default to LP category (no variable types set, so no MIP detection)
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   } catch (const raft::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -353,37 +373,37 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem(
     return CUOPT_INVALID_ARGUMENT;
   }
 
-  problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  problem_and_stream_view_t* problem_and_stream =
+    new problem_and_stream_view_t(get_memory_backend_type());
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
+    auto* problem = problem_and_stream->get_problem();
+    problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
+    problem->set_objective_offset(objective_offset);
+    problem->set_objective_coefficients(objective_coefficients, num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
+    problem->set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                            Q_nnz,
+                                            quadratic_objective_matrix_column_indices,
+                                            Q_nnz,
+                                            quadratic_objective_matrix_row_offsets,
+                                            num_variables + 1);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+    problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                       nnz,
+                                       constraint_matrix_column_indices,
+                                       nnz,
+                                       constraint_matrix_row_offsets,
+                                       num_constraints + 1);
+    problem->set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+    problem->set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+    problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
+    problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+    // Quadratic problems default to LP category (no variable types set, so no MIP detection)
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   } catch (const raft::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -404,7 +424,7 @@ cuopt_int_t cuOptGetNumConstraints(cuOptOptimizationProblem problem,
   if (num_constraints_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_constraints_ptr = problem_and_stream_view->op_problem->get_n_constraints();
+  *num_constraints_ptr = problem_and_stream_view->get_problem()->get_n_constraints();
   return CUOPT_SUCCESS;
 }
 
@@ -414,7 +434,7 @@ cuopt_int_t cuOptGetNumVariables(cuOptOptimizationProblem problem, cuopt_int_t*
   if (num_variables_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_variables_ptr = problem_and_stream_view->op_problem->get_n_variables();
+  *num_variables_ptr = problem_and_stream_view->get_problem()->get_n_variables();
   return CUOPT_SUCCESS;
 }
 
@@ -426,7 +446,7 @@ cuopt_int_t cuOptGetObjectiveSense(cuOptOptimizationProblem problem,
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
   *objective_sense_ptr =
-    problem_and_stream_view->op_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+    problem_and_stream_view->get_problem()->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
   return CUOPT_SUCCESS;
 }
 
@@ -437,7 +457,7 @@ cuopt_int_t cuOptGetObjectiveOffset(cuOptOptimizationProblem problem,
   if (objective_offset_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *objective_offset_ptr = problem_and_stream_view->op_problem->get_objective_offset();
+  *objective_offset_ptr = problem_and_stream_view->get_problem()->get_objective_offset();
   return CUOPT_SUCCESS;
 }
 
@@ -448,13 +468,11 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem,
   if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& objective_coefficients =
-    problem_and_stream_view->op_problem->get_objective_coefficients();
-  raft::copy(objective_coefficients_ptr,
-             objective_coefficients.data(),
-             objective_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_variables();
+  problem_and_stream_view->get_problem()->copy_objective_coefficients_to_host(
+    objective_coefficients_ptr, size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -465,7 +483,7 @@ cuopt_int_t cuOptGetNumNonZeros(cuOptOptimizationProblem problem,
   if (num_non_zero_elements_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_non_zero_elements_ptr = problem_and_stream_view->op_problem->get_nnz();
+  *num_non_zero_elements_ptr = problem_and_stream_view->get_problem()->get_nnz();
   return CUOPT_SUCCESS;
 }
 
@@ -480,25 +498,18 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem,
   if (constraint_matrix_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& constraint_matrix_coefficients =
-    problem_and_stream_view->op_problem->get_constraint_matrix_values();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_column_indices =
-    problem_and_stream_view->op_problem->get_constraint_matrix_indices();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_row_offsets =
-    problem_and_stream_view->op_problem->get_constraint_matrix_offsets();
-  raft::copy(constraint_matrix_coefficients_ptr,
-             constraint_matrix_coefficients.data(),
-             constraint_matrix_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_column_indices_ptr,
-             constraint_matrix_column_indices.data(),
-             constraint_matrix_column_indices.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_row_offsets_ptr,
-             constraint_matrix_row_offsets.data(),
-             constraint_matrix_row_offsets.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  auto* prob           = problem_and_stream_view->get_problem();
+  cuopt_int_t num_nnz  = prob->get_nnz();
+  cuopt_int_t num_rows = prob->get_n_constraints();
+
+  prob->copy_constraint_matrix_to_host(constraint_matrix_coefficients_ptr,
+                                       constraint_matrix_column_indices_ptr,
+                                       constraint_matrix_row_offsets_ptr,
+                                       num_nnz,
+                                       num_nnz,
+                                       num_rows + 1);
+
   return CUOPT_SUCCESS;
 }
 
@@ -508,13 +519,10 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons
   if (constraint_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<char>& constraint_sense =
-    problem_and_stream_view->op_problem->get_row_types();
-  raft::copy(constraint_sense_ptr,
-             constraint_sense.data(),
-             constraint_sense.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_constraints();
+  problem_and_stream_view->get_problem()->copy_row_types_to_host(constraint_sense_ptr, size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -525,10 +533,10 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem,
   if (rhs_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& rhs =
-    problem_and_stream_view->op_problem->get_constraint_bounds();
-  raft::copy(rhs_ptr, rhs.data(), rhs.size(), problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_constraints();
+  problem_and_stream_view->get_problem()->copy_constraint_bounds_to_host(rhs_ptr, size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -539,13 +547,11 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_constraint_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_constraints();
+  problem_and_stream_view->get_problem()->copy_constraint_lower_bounds_to_host(lower_bounds_ptr,
+                                                                               size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -556,13 +562,11 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_constraint_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_constraints();
+  problem_and_stream_view->get_problem()->copy_constraint_upper_bounds_to_host(upper_bounds_ptr,
+                                                                               size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -573,13 +577,11 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_variable_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_variables();
+  problem_and_stream_view->get_problem()->copy_variable_lower_bounds_to_host(lower_bounds_ptr,
+                                                                             size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -590,13 +592,11 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_variable_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_variables();
+  problem_and_stream_view->get_problem()->copy_variable_upper_bounds_to_host(upper_bounds_ptr,
+                                                                             size);
+
   return CUOPT_SUCCESS;
 }
 
@@ -606,14 +606,13 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab
   if (variable_types_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<var_t>& variable_types =
-    problem_and_stream_view->op_problem->get_variable_types();
-  std::vector<cuopt::linear_programming::var_t> variable_types_host(variable_types.size());
-  raft::copy(variable_types_host.data(),
-             variable_types.data(),
-             variable_types.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  cuopt_int_t size = problem_and_stream_view->get_problem()->get_n_variables();
+  std::vector<cuopt::linear_programming::var_t> variable_types_host(size);
+  problem_and_stream_view->get_problem()->copy_variable_types_to_host(variable_types_host.data(),
+                                                                      size);
+
+  // Convert var_t enum to C API char values
   for (size_t j = 0; j < variable_types_host.size(); j++) {
     variable_types_ptr[j] =
       variable_types_host[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
@@ -841,9 +840,8 @@ cuopt_int_t cuOptIsMIP(cuOptOptimizationProblem problem, cuopt_int_t* is_mip_ptr
   if (is_mip_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  bool is_mip =
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP) ||
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP);
+  problem_category_t category = problem_and_stream_view->get_problem()->get_problem_category();
+  bool is_mip = (category == problem_category_t::MIP) || (category == problem_category_t::IP);
   *is_mip_ptr = static_cast<cuopt_int_t>(is_mip);
   return CUOPT_SUCCESS;
 }
@@ -857,44 +855,64 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem,
   if (problem == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (settings == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (solution_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
+
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  if (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP ||
-      problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP) {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      get_settings_handle(settings)->settings;
-    mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
-      solver_settings->get_mip_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(true, problem_and_stream_view->stream_view);
-    solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
-      solve_mip<cuopt_int_t, cuopt_float_t>(*op_problem, mip_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
-  } else {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      get_settings_handle(settings)->settings;
-    pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
-      solver_settings->get_pdlp_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(false, problem_and_stream_view->stream_view);
-    solution_and_stream_view->lp_solution_ptr =
-      new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
-        solve_lp<cuopt_int_t, cuopt_float_t>(*op_problem, pdlp_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+
+  // Get the problem interface (GPU or CPU backed)
+  optimization_problem_interface_t<cuopt_int_t, cuopt_float_t>* problem_interface =
+    problem_and_stream_view->get_problem();
+
+  try {
+    if (problem_interface->get_problem_category() == problem_category_t::MIP ||
+        problem_interface->get_problem_category() == problem_category_t::IP) {
+      solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
+        get_settings_handle(settings)->settings;
+      mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
+        solver_settings->get_mip_settings();
+
+      // Solve returns unique_ptr<mip_solution_interface_t>
+      auto solution_interface =
+        solve_mip<cuopt_int_t, cuopt_float_t>(problem_interface, mip_settings);
+
+      // Store interface pointer directly (works on both GPU and CPU-only hosts)
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(true, problem_and_stream_view->memory_backend);
+      solution_and_stream_view->mip_solution_interface_ptr = solution_interface.release();
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->mip_solution_interface_ptr->get_error_status().get_error_type());
+    } else {
+      solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
+        get_settings_handle(settings)->settings;
+      pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
+        solver_settings->get_pdlp_settings();
+
+      // Solve returns unique_ptr<lp_solution_interface_t>
+      auto solution_interface =
+        solve_lp<cuopt_int_t, cuopt_float_t>(problem_interface, pdlp_settings);
+
+      // Store interface pointer directly (works on both GPU and CPU-only hosts)
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(false, problem_and_stream_view->memory_backend);
+      solution_and_stream_view->lp_solution_interface_ptr = solution_interface.release();
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->lp_solution_interface_ptr->get_error_status().get_error_type());
+    }
+  } catch (const cuopt::logic_error& e) {
+    // Remote execution not yet implemented or other logic errors
+    CUOPT_LOG_ERROR("Solve failed: %s", e.what());
+    return static_cast<cuopt_int_t>(e.get_error_type());
+  } catch (const std::exception& e) {
+    CUOPT_LOG_ERROR("Solve failed with exception: %s", e.what());
+    return CUOPT_RUNTIME_ERROR;
   }
 }
 
@@ -904,17 +922,7 @@ void cuOptDestroySolution(cuOptSolution* solution_ptr)
   if (*solution_ptr == nullptr) { return; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(*solution_ptr);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    delete mip_solution;
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    delete optimization_problem_solution;
-  }
+  // Destructor handles cleanup of interface pointers
   delete solution_and_stream_view;
   *solution_ptr = nullptr;
 }
@@ -925,18 +933,8 @@ cuopt_int_t cuOptGetTerminationStatus(cuOptSolution solution, cuopt_int_t* termi
   if (termination_status_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    *termination_status_ptr = static_cast<cuopt_int_t>(mip_solution->get_termination_status());
-  } else {
-    pdlp_termination_status_t termination_status =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr)
-        ->get_termination_status();
-    *termination_status_ptr = static_cast<cuopt_int_t>(termination_status);
-  }
+  *termination_status_ptr = static_cast<cuopt_int_t>(
+    solution_and_stream_view->get_solution()->get_termination_status_int());
   return CUOPT_SUCCESS;
 }
 
@@ -946,13 +944,8 @@ cuopt_int_t cuOptGetErrorStatus(cuOptSolution solution, cuopt_int_t* error_statu
   if (error_status_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    *error_status_ptr = static_cast<cuopt_int_t>(
-      solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
-  } else {
-    *error_status_ptr = static_cast<cuopt_int_t>(
-      solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
-  }
+  *error_status_ptr = static_cast<cuopt_int_t>(
+    solution_and_stream_view->get_solution()->get_error_status().get_error_type());
   return CUOPT_SUCCESS;
 }
 
@@ -964,14 +957,8 @@ cuopt_int_t cuOptGetErrorString(cuOptSolution solution,
   if (error_string_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    std::string error_string =
-      solution_and_stream_view->mip_solution_ptr->get_error_status().what();
-    std::snprintf(error_string_ptr, error_string_size, "%s", error_string.c_str());
-  } else {
-    std::string error_string = solution_and_stream_view->lp_solution_ptr->get_error_status().what();
-    std::snprintf(error_string_ptr, error_string_size, "%s", error_string.c_str());
-  }
+  std::string error_string = solution_and_stream_view->get_solution()->get_error_status().what();
+  std::snprintf(error_string_ptr, error_string_size, "%s", error_string.c_str());
   return CUOPT_SUCCESS;
 }
 
@@ -981,29 +968,10 @@ cuopt_int_t cuOptGetPrimalSolution(cuOptSolution solution, cuopt_float_t* soluti
   if (solution_values_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
-    rmm::cuda_stream_view stream_view{};
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values =
-      optimization_problem_solution->get_primal_solution();
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
-  }
+
+  const auto& solution_host = solution_and_stream_view->get_solution()->get_solution_host();
+  std::memcpy(
+    solution_values_ptr, solution_host.data(), solution_host.size() * sizeof(cuopt_float_t));
   return CUOPT_SUCCESS;
 }
 
@@ -1013,17 +981,7 @@ cuopt_int_t cuOptGetObjectiveValue(cuOptSolution solution, cuopt_float_t* object
   if (objective_value_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    *objective_value_ptr = mip_solution->get_objective_value();
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    *objective_value_ptr = optimization_problem_solution->get_objective_value();
-  }
+  *objective_value_ptr = solution_and_stream_view->get_solution()->get_objective_value();
   return CUOPT_SUCCESS;
 }
 
@@ -1033,17 +991,7 @@ cuopt_int_t cuOptGetSolveTime(cuOptSolution solution, cuopt_float_t* solve_time_
   if (solve_time_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    *solve_time_ptr = mip_solution->get_total_solve_time();
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    *solve_time_ptr = (optimization_problem_solution->get_solve_time());
-  }
+  *solve_time_ptr = solution_and_stream_view->get_solution()->get_solve_time();
   return CUOPT_SUCCESS;
 }
 
@@ -1053,15 +1001,12 @@ cuopt_int_t cuOptGetMIPGap(cuOptSolution solution, cuopt_float_t* mip_gap_ptr)
   if (mip_gap_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    *mip_gap_ptr = mip_solution->get_mip_gap();
-  } else {
+  try {
+    *mip_gap_ptr = solution_and_stream_view->get_solution()->get_mip_gap();
+    return CUOPT_SUCCESS;
+  } catch (const std::logic_error&) {
     return CUOPT_INVALID_ARGUMENT;
   }
-  return CUOPT_SUCCESS;
 }
 
 cuopt_int_t cuOptGetSolutionBound(cuOptSolution solution, cuopt_float_t* solution_bound_ptr)
@@ -1070,15 +1015,12 @@ cuopt_int_t cuOptGetSolutionBound(cuOptSolution solution, cuopt_float_t* solutio
   if (solution_bound_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
-      static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->mip_solution_ptr);
-    *solution_bound_ptr = mip_solution->get_solution_bound();
-  } else {
+  try {
+    *solution_bound_ptr = solution_and_stream_view->get_solution()->get_solution_bound();
+    return CUOPT_SUCCESS;
+  } catch (const std::logic_error&) {
     return CUOPT_INVALID_ARGUMENT;
   }
-  return CUOPT_SUCCESS;
 }
 
 cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_solution_ptr)
@@ -1087,20 +1029,12 @@ cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_sol
   if (dual_solution_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    return CUOPT_INVALID_ARGUMENT;
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& dual_solution =
-      optimization_problem_solution->get_dual_solution();
-    raft::copy(dual_solution_ptr,
-               dual_solution.data(),
-               dual_solution.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+  try {
+    const auto& dual_host = solution_and_stream_view->get_solution()->get_dual_solution();
+    std::memcpy(dual_solution_ptr, dual_host.data(), dual_host.size() * sizeof(cuopt_float_t));
     return CUOPT_SUCCESS;
+  } catch (const std::logic_error&) {
+    return CUOPT_INVALID_ARGUMENT;
   }
 }
 
@@ -1111,14 +1045,12 @@ cuopt_int_t cuOptGetDualObjectiveValue(cuOptSolution solution,
   if (dual_objective_value_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    return CUOPT_INVALID_ARGUMENT;
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    *dual_objective_value_ptr = optimization_problem_solution->get_dual_objective_value();
+  try {
+    *dual_objective_value_ptr =
+      solution_and_stream_view->get_solution()->get_dual_objective_value();
     return CUOPT_SUCCESS;
+  } catch (const std::logic_error&) {
+    return CUOPT_INVALID_ARGUMENT;
   }
 }
 
@@ -1128,19 +1060,12 @@ cuopt_int_t cuOptGetReducedCosts(cuOptSolution solution, cuopt_float_t* reduced_
   if (reduced_cost_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   solution_and_stream_view_t* solution_and_stream_view =
     static_cast<solution_and_stream_view_t*>(solution);
-  if (solution_and_stream_view->is_mip) {
-    return CUOPT_INVALID_ARGUMENT;
-  } else {
-    optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
-      static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
-        solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& reduced_cost =
-      optimization_problem_solution->get_reduced_cost();
-    raft::copy(reduced_cost_ptr,
-               reduced_cost.data(),
-               reduced_cost.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+  try {
+    const auto& reduced_cost_host = solution_and_stream_view->get_solution()->get_reduced_costs();
+    std::memcpy(
+      reduced_cost_ptr, reduced_cost_host.data(), reduced_cost_host.size() * sizeof(cuopt_float_t));
     return CUOPT_SUCCESS;
+  } catch (const std::logic_error&) {
+    return CUOPT_INVALID_ARGUMENT;
   }
 }
diff --git a/cpp/src/linear_programming/cuopt_c_internal.hpp b/cpp/src/linear_programming/cuopt_c_internal.hpp
index de9d6e559..c6de047c2 100644
--- a/cpp/src/linear_programming/cuopt_c_internal.hpp
+++ b/cpp/src/linear_programming/cuopt_c_internal.hpp
@@ -10,6 +10,8 @@
 #include <cuopt/linear_programming/cuopt_c.h>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_solution_interface.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 
 #include <raft/core/handle.hpp>
@@ -19,28 +21,91 @@
 namespace cuopt::linear_programming {
 
 struct problem_and_stream_view_t {
-  problem_and_stream_view_t()
-    : op_problem(nullptr), stream_view(rmm::cuda_stream_per_thread), handle(stream_view)
+  problem_and_stream_view_t(memory_backend_t mem_backend)
+    : memory_backend(mem_backend), stream_view_ptr(nullptr), handle_ptr(nullptr)
   {
+    if (mem_backend == memory_backend_t::GPU) {
+      // GPU memory backend: Allocate CUDA resources
+      stream_view_ptr = new rmm::cuda_stream_view(rmm::cuda_stream_per_thread);
+      handle_ptr      = new raft::handle_t(*stream_view_ptr);
+      gpu_problem     = new gpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>(handle_ptr);
+      cpu_problem     = nullptr;
+    } else {
+      // CPU memory backend: No CUDA resources allocated (for remote execution on CPU-only hosts)
+      cpu_problem = new cpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>(nullptr);
+      gpu_problem = nullptr;
+    }
   }
-  raft::handle_t* get_handle_ptr() { return &handle; }
-  optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem;
-  rmm::cuda_stream_view stream_view;
-  raft::handle_t handle;
+
+  ~problem_and_stream_view_t()
+  {
+    if (gpu_problem) delete gpu_problem;
+    if (cpu_problem) delete cpu_problem;
+    if (handle_ptr) delete handle_ptr;
+    if (stream_view_ptr) delete stream_view_ptr;
+  }
+
+  raft::handle_t* get_handle_ptr() { return handle_ptr; }
+
+  optimization_problem_interface_t<cuopt_int_t, cuopt_float_t>* get_problem()
+  {
+    return memory_backend == memory_backend_t::GPU
+             ? static_cast<optimization_problem_interface_t<cuopt_int_t, cuopt_float_t>*>(
+                 gpu_problem)
+             : static_cast<optimization_problem_interface_t<cuopt_int_t, cuopt_float_t>*>(
+                 cpu_problem);
+  }
+
+  optimization_problem_t<cuopt_int_t, cuopt_float_t> to_optimization_problem()
+  {
+    if (memory_backend == memory_backend_t::GPU) {
+      return gpu_problem->to_optimization_problem();
+    } else {
+      return cpu_problem->to_optimization_problem();
+    }
+  }
+
+  memory_backend_t memory_backend;
+  gpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>* gpu_problem;
+  cpu_optimization_problem_t<cuopt_int_t, cuopt_float_t>* cpu_problem;
+  rmm::cuda_stream_view*
+    stream_view_ptr;           // nullptr for CPU memory backend to avoid CUDA initialization
+  raft::handle_t* handle_ptr;  // nullptr for CPU memory backend to avoid CUDA initialization
 };
 
 struct solution_and_stream_view_t {
-  solution_and_stream_view_t(bool solution_for_mip, rmm::cuda_stream_view stream_view)
+  solution_and_stream_view_t(bool solution_for_mip, memory_backend_t mem_backend)
     : is_mip(solution_for_mip),
-      mip_solution_ptr(nullptr),
-      lp_solution_ptr(nullptr),
-      stream_view(stream_view)
+      mip_solution_interface_ptr(nullptr),
+      lp_solution_interface_ptr(nullptr),
+      memory_backend(mem_backend)
+  {
+  }
+
+  ~solution_and_stream_view_t()
   {
+    if (mip_solution_interface_ptr) delete mip_solution_interface_ptr;
+    if (lp_solution_interface_ptr) delete lp_solution_interface_ptr;
   }
+
+  /**
+   * @brief Get the solution as base interface pointer
+   * @return Base interface pointer for polymorphic access to common methods
+   * @note Allows uniform access to get_solution_host(), get_error_status(), get_solve_time()
+   */
+  optimization_problem_solution_interface_t<cuopt_int_t, cuopt_float_t>* get_solution()
+  {
+    return is_mip
+             ? static_cast<optimization_problem_solution_interface_t<cuopt_int_t, cuopt_float_t>*>(
+                 mip_solution_interface_ptr)
+             : static_cast<optimization_problem_solution_interface_t<cuopt_int_t, cuopt_float_t>*>(
+                 lp_solution_interface_ptr);
+  }
+
   bool is_mip;
-  mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution_ptr;
-  optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* lp_solution_ptr;
-  rmm::cuda_stream_view stream_view;
+  mip_solution_interface_t<cuopt_int_t, cuopt_float_t>* mip_solution_interface_ptr;
+  lp_solution_interface_t<cuopt_int_t, cuopt_float_t>* lp_solution_interface_ptr;
+  memory_backend_t memory_backend;  // Track if GPU or CPU memory for data access
 };
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/gpu_optimization_problem.cu b/cpp/src/linear_programming/gpu_optimization_problem.cu
new file mode 100644
index 000000000..420daf715
--- /dev/null
+++ b/cpp/src/linear_programming/gpu_optimization_problem.cu
@@ -0,0 +1,962 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/solve_remote.hpp>
+
+#include <mip/mip_constants.hpp>
+#include <mps_parser/writer.hpp>
+#include <utilities/logger.hpp>
+
+#include <raft/core/copy.hpp>
+#include <raft/core/cuda_support.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/util/cudart_utils.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/count.h>
+#include <thrust/execution_policy.h>
+
+#include <cmath>
+#include <stdexcept>
+#include <unordered_map>
+
+namespace cuopt::linear_programming {
+
+// ==============================================================================
+// Constructor
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+gpu_optimization_problem_t<i_t, f_t>::gpu_optimization_problem_t(raft::handle_t const* handle_ptr)
+  : handle_ptr_(handle_ptr),
+    stream_view_(handle_ptr->get_stream()),
+    A_(0, stream_view_),
+    A_indices_(0, stream_view_),
+    A_offsets_(0, stream_view_),
+    b_(0, stream_view_),
+    c_(0, stream_view_),
+    variable_lower_bounds_(0, stream_view_),
+    variable_upper_bounds_(0, stream_view_),
+    constraint_lower_bounds_(0, stream_view_),
+    constraint_upper_bounds_(0, stream_view_),
+    row_types_(0, stream_view_),
+    variable_types_(0, stream_view_)
+{
+  CUOPT_LOG_INFO("gpu_optimization_problem_t constructor: Using GPU backend");
+}
+
+// ==============================================================================
+// Setters
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_maximize(bool maximize)
+{
+  maximize_ = maximize;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix(const f_t* A_values,
+                                                                     i_t size_values,
+                                                                     const i_t* A_indices,
+                                                                     i_t size_indices,
+                                                                     const i_t* A_offsets,
+                                                                     i_t size_offsets)
+{
+  n_constraints_ = size_offsets - 1;
+
+  A_.resize(size_values, stream_view_);
+  A_indices_.resize(size_indices, stream_view_);
+  A_offsets_.resize(size_offsets, stream_view_);
+
+  raft::copy(A_.data(), A_values, size_values, stream_view_);
+  raft::copy(A_indices_.data(), A_indices, size_indices, stream_view_);
+  raft::copy(A_offsets_.data(), A_offsets, size_offsets, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_bounds(const f_t* b, i_t size)
+{
+  b_.resize(size, stream_view_);
+  raft::copy(b_.data(), b, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_coefficients(const f_t* c, i_t size)
+{
+  n_vars_ = size;
+  c_.resize(size, stream_view_);
+  raft::copy(c_.data(), c, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_scaling_factor(
+  f_t objective_scaling_factor)
+{
+  objective_scaling_factor_ = objective_scaling_factor;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_offset(f_t objective_offset)
+{
+  objective_offset_ = objective_offset;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
+  const f_t* Q_values,
+  i_t size_values,
+  const i_t* Q_indices,
+  i_t size_indices,
+  const i_t* Q_offsets,
+  i_t size_offsets,
+  bool validate_positive_semi_definite)
+{
+  Q_values_.resize(size_values);
+  Q_indices_.resize(size_indices);
+  Q_offsets_.resize(size_offsets);
+
+  std::copy(Q_values, Q_values + size_values, Q_values_.begin());
+  std::copy(Q_indices, Q_indices + size_indices, Q_indices_.begin());
+  std::copy(Q_offsets, Q_offsets + size_offsets, Q_offsets_.begin());
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_lower_bounds(
+  const f_t* variable_lower_bounds, i_t size)
+{
+  variable_lower_bounds_.resize(size, stream_view_);
+  raft::copy(variable_lower_bounds_.data(), variable_lower_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_upper_bounds(
+  const f_t* variable_upper_bounds, i_t size)
+{
+  variable_upper_bounds_.resize(size, stream_view_);
+  raft::copy(variable_upper_bounds_.data(), variable_upper_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_types(const var_t* variable_types, i_t size)
+{
+  variable_types_.resize(size, stream_view_);
+  raft::copy(variable_types_.data(), variable_types, size, stream_view_);
+
+  // Auto-detect problem category based on variable types (matching original optimization_problem_t)
+  i_t n_integer = thrust::count_if(handle_ptr_->get_thrust_policy(),
+                                   variable_types_.begin(),
+                                   variable_types_.end(),
+                                   [] __device__(auto val) { return val == var_t::INTEGER; });
+  // By default it is LP
+  if (n_integer == size) {
+    problem_category_ = problem_category_t::IP;
+  } else if (n_integer > 0) {
+    problem_category_ = problem_category_t::MIP;
+  }
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_problem_category(const problem_category_t& category)
+{
+  problem_category_ = category;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds(
+  const f_t* constraint_lower_bounds, i_t size)
+{
+  constraint_lower_bounds_.resize(size, stream_view_);
+  raft::copy(constraint_lower_bounds_.data(), constraint_lower_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds(
+  const f_t* constraint_upper_bounds, i_t size)
+{
+  constraint_upper_bounds_.resize(size, stream_view_);
+  raft::copy(constraint_upper_bounds_.data(), constraint_upper_bounds, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_row_types(const char* row_types, i_t size)
+{
+  row_types_.resize(size, stream_view_);
+  raft::copy(row_types_.data(), row_types, size, stream_view_);
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
+{
+  objective_name_ = objective_name;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
+{
+  problem_name_ = problem_name;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_variable_names(
+  const std::vector<std::string>& variable_names)
+{
+  var_names_ = variable_names;
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
+{
+  row_names_ = row_names;
+}
+
+// ==============================================================================
+// Device Getters
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_variables() const
+{
+  return n_vars_;
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_constraints() const
+{
+  return n_constraints_;
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_nnz() const
+{
+  return A_.size();
+}
+
+template <typename i_t, typename f_t>
+i_t gpu_optimization_problem_t<i_t, f_t>::get_n_integers() const
+{
+  if (variable_types_.size() == 0) return 0;
+
+  std::vector<var_t> host_types(variable_types_.size());
+  raft::copy(host_types.data(), variable_types_.data(), variable_types_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+
+  i_t count = 0;
+  for (const auto& type : host_types) {
+    if (type == var_t::INTEGER) count++;
+  }
+  return count;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+  const
+{
+  return A_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
+{
+  return A_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices() const
+{
+  return A_indices_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
+{
+  return A_indices_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<i_t>&
+gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets() const
+{
+  return A_offsets_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
+{
+  return A_offsets_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
+{
+  return b_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds()
+{
+  return b_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+  const
+{
+  return c_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients()
+{
+  return c_;
+}
+
+template <typename i_t, typename f_t>
+f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_scaling_factor() const
+{
+  return objective_scaling_factor_;
+}
+
+template <typename i_t, typename f_t>
+f_t gpu_optimization_problem_t<i_t, f_t>::get_objective_offset() const
+{
+  return objective_offset_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+  const
+{
+  return variable_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
+{
+  return variable_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+  const
+{
+  return variable_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
+{
+  return variable_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+  const
+{
+  return constraint_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
+{
+  return constraint_lower_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+  const
+{
+  return constraint_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+rmm::device_uvector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
+{
+  return constraint_upper_bounds_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<char>& gpu_optimization_problem_t<i_t, f_t>::get_row_types() const
+{
+  return row_types_;
+}
+
+template <typename i_t, typename f_t>
+const rmm::device_uvector<var_t>& gpu_optimization_problem_t<i_t, f_t>::get_variable_types() const
+{
+  return variable_types_;
+}
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::get_sense() const
+{
+  return maximize_;
+}
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::empty() const
+{
+  return n_vars_ == 0 || n_constraints_ == 0;
+}
+
+template <typename i_t, typename f_t>
+std::string gpu_optimization_problem_t<i_t, f_t>::get_objective_name() const
+{
+  return objective_name_;
+}
+
+template <typename i_t, typename f_t>
+std::string gpu_optimization_problem_t<i_t, f_t>::get_problem_name() const
+{
+  return problem_name_;
+}
+
+template <typename i_t, typename f_t>
+problem_category_t gpu_optimization_problem_t<i_t, f_t>::get_problem_category() const
+{
+  return problem_category_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_variable_names() const
+{
+  return var_names_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<std::string>& gpu_optimization_problem_t<i_t, f_t>::get_row_names() const
+{
+  return row_names_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets()
+  const
+{
+  return Q_offsets_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<i_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices()
+  const
+{
+  return Q_indices_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& gpu_optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
+{
+  return Q_values_;
+}
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
+{
+  return !Q_values_.empty();
+}
+
+template <typename i_t, typename f_t>
+raft::handle_t const* gpu_optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
+{
+  return handle_ptr_;
+}
+
+// ==============================================================================
+// Host Getters (copy from GPU to CPU)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_values_host() const
+{
+  std::vector<f_t> host_data(A_.size());
+  raft::copy(host_data.data(), A_.data(), A_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices_host() const
+{
+  std::vector<i_t> host_data(A_indices_.size());
+  raft::copy(host_data.data(), A_indices_.data(), A_indices_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<i_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets_host() const
+{
+  std::vector<i_t> host_data(A_offsets_.size());
+  raft::copy(host_data.data(), A_offsets_.data(), A_offsets_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_bounds_host() const
+{
+  std::vector<f_t> host_data(b_.size());
+  raft::copy(host_data.data(), b_.data(), b_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_objective_coefficients_host() const
+{
+  std::vector<f_t> host_data(c_.size());
+  raft::copy(host_data.data(), c_.data(), c_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_lower_bounds_host() const
+{
+  std::vector<f_t> host_data(variable_lower_bounds_.size());
+  raft::copy(
+    host_data.data(), variable_lower_bounds_.data(), variable_lower_bounds_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_upper_bounds_host() const
+{
+  std::vector<f_t> host_data(variable_upper_bounds_.size());
+  raft::copy(
+    host_data.data(), variable_upper_bounds_.data(), variable_upper_bounds_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds_host() const
+{
+  std::vector<f_t> host_data(constraint_lower_bounds_.size());
+  raft::copy(host_data.data(),
+             constraint_lower_bounds_.data(),
+             constraint_lower_bounds_.size(),
+             stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t> gpu_optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds_host() const
+{
+  std::vector<f_t> host_data(constraint_upper_bounds_.size());
+  raft::copy(host_data.data(),
+             constraint_upper_bounds_.data(),
+             constraint_upper_bounds_.size(),
+             stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<char> gpu_optimization_problem_t<i_t, f_t>::get_row_types_host() const
+{
+  std::vector<char> host_data(row_types_.size());
+  raft::copy(host_data.data(), row_types_.data(), row_types_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+template <typename i_t, typename f_t>
+std::vector<var_t> gpu_optimization_problem_t<i_t, f_t>::get_variable_types_host() const
+{
+  std::vector<var_t> host_data(variable_types_.size());
+  raft::copy(host_data.data(), variable_types_.data(), variable_types_.size(), stream_view_);
+  handle_ptr_->sync_stream();
+  return host_data;
+}
+
+// ==============================================================================
+// Conversion to optimization_problem_t
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> gpu_optimization_problem_t<i_t, f_t>::to_optimization_problem()
+{
+  optimization_problem_t<i_t, f_t> problem(handle_ptr_);
+
+  // Set scalar values
+  problem.set_maximize(maximize_);
+  problem.set_objective_scaling_factor(objective_scaling_factor_);
+  problem.set_objective_offset(objective_offset_);
+  problem.set_problem_category(problem_category_);
+
+  // Set string values (copy is acceptable for small strings)
+  if (!objective_name_.empty()) problem.set_objective_name(objective_name_);
+  if (!problem_name_.empty()) problem.set_problem_name(problem_name_);
+  if (!var_names_.empty()) problem.set_variable_names(var_names_);
+  if (!row_names_.empty()) problem.set_row_names(row_names_);
+
+  // MOVE all device vectors (zero-copy transfer of ownership)
+  // This avoids expensive GPU-to-GPU memory copies
+
+  // Move CSR constraint matrix
+  if (A_.size() > 0) {
+    problem.set_csr_constraint_matrix_move(
+      std::move(A_), std::move(A_indices_), std::move(A_offsets_));
+  }
+
+  // Move constraint bounds
+  if (b_.size() > 0) { problem.set_constraint_bounds_move(std::move(b_)); }
+
+  // Move objective coefficients
+  if (c_.size() > 0) { problem.set_objective_coefficients_move(std::move(c_)); }
+
+  // Set quadratic objective if present (stored in std::vector, not device_uvector)
+  if (!Q_values_.empty()) {
+    problem.set_quadratic_objective_matrix(Q_values_.data(),
+                                           Q_values_.size(),
+                                           Q_indices_.data(),
+                                           Q_indices_.size(),
+                                           Q_offsets_.data(),
+                                           Q_offsets_.size());
+  }
+
+  // Move variable bounds
+  if (variable_lower_bounds_.size() > 0) {
+    problem.set_variable_lower_bounds_move(std::move(variable_lower_bounds_));
+  }
+  if (variable_upper_bounds_.size() > 0) {
+    problem.set_variable_upper_bounds_move(std::move(variable_upper_bounds_));
+  }
+
+  // Move variable types
+  if (variable_types_.size() > 0) { problem.set_variable_types_move(std::move(variable_types_)); }
+
+  // Move constraint bounds
+  if (constraint_lower_bounds_.size() > 0) {
+    problem.set_constraint_lower_bounds_move(std::move(constraint_lower_bounds_));
+  }
+  if (constraint_upper_bounds_.size() > 0) {
+    problem.set_constraint_upper_bounds_move(std::move(constraint_upper_bounds_));
+  }
+
+  // Move row types
+  if (row_types_.size() > 0) { problem.set_row_types_move(std::move(row_types_)); }
+
+  // NOTE: After this return, the gpu_optimization_problem_t is left in a valid
+  // but empty state (all device_uvectors are moved from). This is intentional
+  // as we're transferring ownership to optimization_problem_t.
+
+  return problem;
+}
+
+// ==============================================================================
+// File I/O
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
+{
+  // Use the existing host getters to get data, then write to MPS
+  cuopt::mps_parser::data_model_view_t<i_t, f_t> data_model_view;
+
+  // Set optimization sense
+  data_model_view.set_maximize(get_sense());
+
+  // Copy to host using host getters
+  auto constraint_matrix_values  = get_constraint_matrix_values_host();
+  auto constraint_matrix_indices = get_constraint_matrix_indices_host();
+  auto constraint_matrix_offsets = get_constraint_matrix_offsets_host();
+  auto constraint_bounds         = get_constraint_bounds_host();
+  auto objective_coefficients    = get_objective_coefficients_host();
+  auto variable_lower_bounds     = get_variable_lower_bounds_host();
+  auto variable_upper_bounds     = get_variable_upper_bounds_host();
+  auto constraint_lower_bounds   = get_constraint_lower_bounds_host();
+  auto constraint_upper_bounds   = get_constraint_upper_bounds_host();
+  auto row_types                 = get_row_types_host();
+
+  // Set constraint matrix in CSR format
+  if (!constraint_matrix_values.empty()) {
+    data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(),
+                                              constraint_matrix_values.size(),
+                                              constraint_matrix_indices.data(),
+                                              constraint_matrix_indices.size(),
+                                              constraint_matrix_offsets.data(),
+                                              constraint_matrix_offsets.size());
+  }
+
+  // Set constraint bounds (RHS)
+  if (!constraint_bounds.empty()) {
+    data_model_view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+  }
+
+  // Set objective coefficients
+  if (!objective_coefficients.empty()) {
+    data_model_view.set_objective_coefficients(objective_coefficients.data(),
+                                               objective_coefficients.size());
+  }
+
+  // Set objective scaling and offset
+  data_model_view.set_objective_scaling_factor(objective_scaling_factor_);
+  data_model_view.set_objective_offset(objective_offset_);
+
+  // Set variable bounds
+  if (!variable_lower_bounds.empty()) {
+    data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(),
+                                              variable_lower_bounds.size());
+    data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(),
+                                              variable_upper_bounds.size());
+  }
+
+  // Set row types (constraint types)
+  if (!row_types.empty()) { data_model_view.set_row_types(row_types.data(), row_types.size()); }
+
+  // Set constraint bounds (lower and upper)
+  if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) {
+    data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
+                                                constraint_lower_bounds.size());
+    data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
+                                                constraint_upper_bounds.size());
+  }
+
+  // Set problem and variable names FIRST (before variable types)
+  if (!problem_name_.empty()) { data_model_view.set_problem_name(problem_name_); }
+  if (!objective_name_.empty()) { data_model_view.set_objective_name(objective_name_); }
+  if (!var_names_.empty()) { data_model_view.set_variable_names(var_names_); }
+  if (!row_names_.empty()) { data_model_view.set_row_names(row_names_); }
+
+  // Set variable types AFTER names (convert from enum to char)
+  // CRITICAL: Declare variable_types OUTSIDE the if block so it stays alive
+  // until after write_mps() is called, since data_model_view stores a span (pointer) to it
+  std::vector<char> variable_types;
+  if (n_vars_ > 0) {
+    auto enum_variable_types = get_variable_types_host();
+    variable_types.resize(enum_variable_types.size());
+
+    for (size_t i = 0; i < variable_types.size(); ++i) {
+      variable_types[i] = (enum_variable_types[i] == var_t::INTEGER) ? 'I' : 'C';
+    }
+
+    data_model_view.set_variable_types(variable_types.data(), variable_types.size());
+  }
+
+  cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
+}
+
+// ==============================================================================
+// Comparison
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+bool gpu_optimization_problem_t<i_t, f_t>::is_equivalent(
+  const optimization_problem_interface_t<i_t, f_t>& other) const
+{
+  // Compare scalar properties
+  if (maximize_ != other.get_sense()) return false;
+  if (n_vars_ != other.get_n_variables()) return false;
+  if (n_constraints_ != other.get_n_constraints()) return false;
+  if (objective_scaling_factor_ != other.get_objective_scaling_factor()) return false;
+  if (objective_offset_ != other.get_objective_offset()) return false;
+  if (problem_category_ != other.get_problem_category()) return false;
+
+  // Get host data from both problems
+  auto this_c  = get_objective_coefficients_host();
+  auto other_c = other.get_objective_coefficients_host();
+  if (this_c.size() != other_c.size()) return false;
+
+  auto this_var_lb  = get_variable_lower_bounds_host();
+  auto other_var_lb = other.get_variable_lower_bounds_host();
+  if (this_var_lb.size() != other_var_lb.size()) return false;
+
+  auto this_var_ub  = get_variable_upper_bounds_host();
+  auto other_var_ub = other.get_variable_upper_bounds_host();
+  if (this_var_ub.size() != other_var_ub.size()) return false;
+
+  auto this_var_types  = get_variable_types_host();
+  auto other_var_types = other.get_variable_types_host();
+  if (this_var_types.size() != other_var_types.size()) return false;
+
+  auto this_b  = get_constraint_bounds_host();
+  auto other_b = other.get_constraint_bounds_host();
+  if (this_b.size() != other_b.size()) return false;
+
+  auto this_A_values  = get_constraint_matrix_values_host();
+  auto other_A_values = other.get_constraint_matrix_values_host();
+  if (this_A_values.size() != other_A_values.size()) return false;
+
+  // Check if we have variable and row names for permutation matching
+  const auto& this_var_names  = get_variable_names();
+  const auto& other_var_names = other.get_variable_names();
+  const auto& this_row_names  = get_row_names();
+  const auto& other_row_names = other.get_row_names();
+
+  if (this_var_names.empty() || other_var_names.empty()) return false;
+  if (this_row_names.empty() || other_row_names.empty()) return false;
+
+  // Build variable permutation map
+  std::unordered_map<std::string, i_t> other_var_idx;
+  for (size_t j = 0; j < other_var_names.size(); ++j) {
+    other_var_idx[other_var_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> var_perm(n_vars_);
+  for (i_t i = 0; i < n_vars_; ++i) {
+    auto it = other_var_idx.find(this_var_names[i]);
+    if (it == other_var_idx.end()) return false;
+    var_perm[i] = it->second;
+  }
+
+  // Build row permutation map
+  std::unordered_map<std::string, i_t> other_row_idx;
+  for (size_t j = 0; j < other_row_names.size(); ++j) {
+    other_row_idx[other_row_names[j]] = static_cast<i_t>(j);
+  }
+
+  std::vector<i_t> row_perm(n_constraints_);
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    auto it = other_row_idx.find(this_row_names[i]);
+    if (it == other_row_idx.end()) return false;
+    row_perm[i] = it->second;
+  }
+
+  // Compare variable-indexed arrays with permutation
+  for (i_t i = 0; i < n_vars_; ++i) {
+    i_t j = var_perm[i];
+    if (std::abs(this_c[i] - other_c[j]) > 1e-9) return false;
+    if (std::abs(this_var_lb[i] - other_var_lb[j]) > 1e-9) return false;
+    if (std::abs(this_var_ub[i] - other_var_ub[j]) > 1e-9) return false;
+    if (this_var_types[i] != other_var_types[j]) return false;
+  }
+
+  // Compare constraint-indexed arrays with permutation
+  for (i_t i = 0; i < n_constraints_; ++i) {
+    i_t j = row_perm[i];
+    if (std::abs(this_b[i] - other_b[j]) > 1e-9) return false;
+  }
+
+  // For CSR matrix, we'd need more complex comparison - for now just check size matches
+  // A full implementation would need to compare matrix entries with row/column permutations
+  if (this_A_values.size() != other_A_values.size()) return false;
+
+  return true;
+}
+
+// ==============================================================================
+// Remote Execution (Polymorphic Dispatch)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>>
+gpu_optimization_problem_t<i_t, f_t>::solve_lp_remote(
+  pdlp_solver_settings_t<i_t, f_t> const& settings) const
+{
+  // Forward to the gpu_optimization_problem_t overload
+  // Need to cast away const since solve functions take non-const reference
+  auto& non_const_this = const_cast<gpu_optimization_problem_t<i_t, f_t>&>(*this);
+  return ::cuopt::linear_programming::solve_lp_remote(non_const_this, settings);
+}
+
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>>
+gpu_optimization_problem_t<i_t, f_t>::solve_mip_remote(
+  mip_solver_settings_t<i_t, f_t> const& settings) const
+{
+  // Forward to the gpu_optimization_problem_t overload
+  auto& non_const_this = const_cast<gpu_optimization_problem_t<i_t, f_t>&>(*this);
+  return ::cuopt::linear_programming::solve_mip_remote(non_const_this, settings);
+}
+
+// ==============================================================================
+// C API Support: Copy to Host (GPU Implementation)
+// ==============================================================================
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_objective_coefficients_to_host(f_t* output,
+                                                                               i_t size) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(output, c_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_matrix_to_host(
+  f_t* values, i_t* indices, i_t* offsets, i_t num_values, i_t num_indices, i_t num_offsets) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(values, A_.data(), num_values * sizeof(f_t), cudaMemcpyDeviceToHost));
+  RAFT_CUDA_TRY(
+    cudaMemcpy(indices, A_indices_.data(), num_indices * sizeof(i_t), cudaMemcpyDeviceToHost));
+  RAFT_CUDA_TRY(
+    cudaMemcpy(offsets, A_offsets_.data(), num_offsets * sizeof(i_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_row_types_to_host(char* output, i_t size) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(output, row_types_.data(), size * sizeof(char), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_bounds_to_host(f_t* output,
+                                                                          i_t size) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(output, b_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_lower_bounds_to_host(f_t* output,
+                                                                                i_t size) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(
+    output, constraint_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_constraint_upper_bounds_to_host(f_t* output,
+                                                                                i_t size) const
+{
+  RAFT_CUDA_TRY(cudaMemcpy(
+    output, constraint_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_variable_lower_bounds_to_host(f_t* output,
+                                                                              i_t size) const
+{
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_lower_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_variable_upper_bounds_to_host(f_t* output,
+                                                                              i_t size) const
+{
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_upper_bounds_.data(), size * sizeof(f_t), cudaMemcpyDeviceToHost));
+}
+
+template <typename i_t, typename f_t>
+void gpu_optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output,
+                                                                       i_t size) const
+{
+  RAFT_CUDA_TRY(
+    cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost));
+}
+
+// ==============================================================================
+// Template instantiations
+// ==============================================================================
+// Explicit template instantiations matching optimization_problem_t
+#if MIP_INSTANTIATE_FLOAT
+template class gpu_optimization_problem_t<int32_t, float>;
+#endif
+#if MIP_INSTANTIATE_DOUBLE
+template class gpu_optimization_problem_t<int32_t, double>;
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index ba57141e9..1f633985f 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -613,6 +613,80 @@ void optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::stri
   row_names_ = row_names;
 }
 
+// ============================================================================
+// Move-based setters (zero-copy, transfers ownership)
+// ============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_csr_constraint_matrix_move(
+  rmm::device_uvector<f_t>&& A_values,
+  rmm::device_uvector<i_t>&& A_indices,
+  rmm::device_uvector<i_t>&& A_offsets)
+{
+  A_             = std::move(A_values);
+  A_indices_     = std::move(A_indices);
+  A_offsets_     = std::move(A_offsets);
+  n_constraints_ = A_offsets_.size() - 1;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_constraint_bounds_move(rmm::device_uvector<f_t>&& b)
+{
+  b_ = std::move(b);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_objective_coefficients_move(rmm::device_uvector<f_t>&& c)
+{
+  c_      = std::move(c);
+  n_vars_ = c_.size();
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_variable_lower_bounds_move(
+  rmm::device_uvector<f_t>&& variable_lower_bounds)
+{
+  variable_lower_bounds_ = std::move(variable_lower_bounds);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_variable_upper_bounds_move(
+  rmm::device_uvector<f_t>&& variable_upper_bounds)
+{
+  variable_upper_bounds_ = std::move(variable_upper_bounds);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_variable_types_move(
+  rmm::device_uvector<var_t>&& variable_types)
+{
+  variable_types_ = std::move(variable_types);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_constraint_lower_bounds_move(
+  rmm::device_uvector<f_t>&& constraint_lower_bounds)
+{
+  constraint_lower_bounds_ = std::move(constraint_lower_bounds);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_constraint_upper_bounds_move(
+  rmm::device_uvector<f_t>&& constraint_upper_bounds)
+{
+  constraint_upper_bounds_ = std::move(constraint_upper_bounds);
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_t<i_t, f_t>::set_row_types_move(rmm::device_uvector<char>&& row_types)
+{
+  row_types_ = std::move(row_types);
+}
+
+// ============================================================================
+// Getters
+// ============================================================================
+
 template <typename i_t, typename f_t>
 i_t optimization_problem_t<i_t, f_t>::get_n_variables() const
 {
diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu
index 8a05f1b2a..4d2be033e 100644
--- a/cpp/src/linear_programming/pdlp.cu
+++ b/cpp/src/linear_programming/pdlp.cu
@@ -173,7 +173,7 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
     set_initial_dual_solution(dual_sol);
   }
 
-  if (settings_.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.size() != 0) {
+  if (settings_.get_pdlp_warm_start_data().is_populated()) {
     cuopt_expects(
       !batch_mode_, error_type_t::ValidationError, "Batch mode not supported for warm start");
     cuopt_expects(settings_.pdlp_solver_mode == pdlp_solver_mode_t::Stable2,
@@ -2204,8 +2204,7 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
     "Initial primal_weight", primal_weight_.data(), primal_weight_.size(), std::cout);
 #endif
 
-  bool warm_start_was_given =
-    settings_.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.size() != 0;
+  bool warm_start_was_given = settings_.get_pdlp_warm_start_data().is_populated();
 
   // In batch mode, before running the solver, we need to transpose the primal and dual solution to
   // row format
diff --git a/cpp/src/linear_programming/solution_conversion.cu b/cpp/src/linear_programming/solution_conversion.cu
new file mode 100644
index 000000000..1680102da
--- /dev/null
+++ b/cpp/src/linear_programming/solution_conversion.cu
@@ -0,0 +1,236 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+/**
+ * @file solution_conversion.cu
+ * @brief Implementations of conversion methods from solution classes to Cython ret structs
+ */
+
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/utilities/cython_solve.hpp>
+
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
+
+namespace cuopt::linear_programming {
+
+// ===========================
+// GPU LP Solution Conversion
+// ===========================
+
+template <typename i_t, typename f_t>
+cuopt::cython::linear_programming_ret_t
+gpu_lp_solution_t<i_t, f_t>::to_linear_programming_ret_t() &&
+{
+  cuopt::cython::linear_programming_ret_t ret;
+
+  // Move GPU solution data into device_buffer wrappers
+  // This is zero-copy - we're just transferring ownership
+  auto& sol = solution_;
+
+  // Main solution vectors
+  ret.primal_solution_ =
+    std::make_unique<rmm::device_buffer>(std::move(sol.get_primal_solution()).release());
+  ret.dual_solution_ =
+    std::make_unique<rmm::device_buffer>(std::move(sol.get_dual_solution()).release());
+  ret.reduced_cost_ =
+    std::make_unique<rmm::device_buffer>(std::move(sol.get_reduced_cost()).release());
+
+  // Warm start data
+  auto& ws = sol.get_pdlp_warm_start_data();
+  if (ws.current_primal_solution_.size() > 0) {
+    ret.current_primal_solution_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.current_primal_solution_).release());
+    ret.current_dual_solution_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.current_dual_solution_).release());
+    ret.initial_primal_average_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.initial_primal_average_).release());
+    ret.initial_dual_average_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.initial_dual_average_).release());
+    ret.current_ATY_ = std::make_unique<rmm::device_buffer>(std::move(ws.current_ATY_).release());
+    ret.sum_primal_solutions_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.sum_primal_solutions_).release());
+    ret.sum_dual_solutions_ =
+      std::make_unique<rmm::device_buffer>(std::move(ws.sum_dual_solutions_).release());
+    ret.last_restart_duality_gap_primal_solution_ = std::make_unique<rmm::device_buffer>(
+      std::move(ws.last_restart_duality_gap_primal_solution_).release());
+    ret.last_restart_duality_gap_dual_solution_ = std::make_unique<rmm::device_buffer>(
+      std::move(ws.last_restart_duality_gap_dual_solution_).release());
+
+    // Scalar warm start data
+    ret.initial_primal_weight_         = ws.initial_primal_weight_;
+    ret.initial_step_size_             = ws.initial_step_size_;
+    ret.total_pdlp_iterations_         = ws.total_pdlp_iterations_;
+    ret.total_pdhg_iterations_         = ws.total_pdhg_iterations_;
+    ret.last_candidate_kkt_score_      = ws.last_candidate_kkt_score_;
+    ret.last_restart_kkt_score_        = ws.last_restart_kkt_score_;
+    ret.sum_solution_weight_           = ws.sum_solution_weight_;
+    ret.iterations_since_last_restart_ = ws.iterations_since_last_restart_;
+  } else {
+    // No warm start data - set empty buffers and default values
+    ret.current_primal_solution_                  = std::make_unique<rmm::device_buffer>();
+    ret.current_dual_solution_                    = std::make_unique<rmm::device_buffer>();
+    ret.initial_primal_average_                   = std::make_unique<rmm::device_buffer>();
+    ret.initial_dual_average_                     = std::make_unique<rmm::device_buffer>();
+    ret.current_ATY_                              = std::make_unique<rmm::device_buffer>();
+    ret.sum_primal_solutions_                     = std::make_unique<rmm::device_buffer>();
+    ret.sum_dual_solutions_                       = std::make_unique<rmm::device_buffer>();
+    ret.last_restart_duality_gap_primal_solution_ = std::make_unique<rmm::device_buffer>();
+    ret.last_restart_duality_gap_dual_solution_   = std::make_unique<rmm::device_buffer>();
+
+    ret.initial_primal_weight_         = 0.0;
+    ret.initial_step_size_             = 0.0;
+    ret.total_pdlp_iterations_         = 0;
+    ret.total_pdhg_iterations_         = 0;
+    ret.last_candidate_kkt_score_      = 0.0;
+    ret.last_restart_kkt_score_        = 0.0;
+    ret.sum_solution_weight_           = 0.0;
+    ret.iterations_since_last_restart_ = 0;
+  }
+
+  // Metadata and termination stats
+  auto term_info          = solution_.get_additional_termination_information(0);
+  ret.termination_status_ = solution_.get_termination_status(0);
+  ret.error_status_       = solution_.get_error_status().get_error_type();
+  ret.error_message_      = std::string(solution_.get_error_status().what());
+  ret.l2_primal_residual_ = term_info.l2_primal_residual;
+  ret.l2_dual_residual_   = term_info.l2_dual_residual;
+  ret.primal_objective_   = term_info.primal_objective;
+  ret.dual_objective_     = term_info.dual_objective;
+  ret.gap_                = term_info.gap;
+  ret.nb_iterations_      = term_info.number_of_steps_taken;
+  ret.solve_time_         = term_info.solve_time;
+  ret.solved_by_pdlp_     = term_info.solved_by_pdlp;
+
+  return ret;
+}
+
+// ===========================
+// GPU MIP Solution Conversion
+// ===========================
+
+template <typename i_t, typename f_t>
+cuopt::cython::mip_ret_t gpu_mip_solution_t<i_t, f_t>::to_mip_ret_t() &&
+{
+  cuopt::cython::mip_ret_t ret;
+
+  // Move GPU solution data into device_buffer wrapper
+  ret.solution_ =
+    std::make_unique<rmm::device_buffer>(std::move(solution_.get_solution()).release());
+
+  // Metadata and termination stats
+  ret.termination_status_           = solution_.get_termination_status();
+  ret.error_status_                 = solution_.get_error_status().get_error_type();
+  ret.error_message_                = std::string(solution_.get_error_status().what());
+  ret.objective_                    = solution_.get_objective_value();
+  ret.mip_gap_                      = solution_.get_mip_gap();
+  ret.solution_bound_               = solution_.get_solution_bound();
+  ret.total_solve_time_             = solution_.get_total_solve_time();
+  ret.presolve_time_                = solution_.get_presolve_time();
+  ret.max_constraint_violation_     = solution_.get_max_constraint_violation();
+  ret.max_int_violation_            = solution_.get_max_int_violation();
+  ret.max_variable_bound_violation_ = solution_.get_max_variable_bound_violation();
+  ret.nodes_                        = solution_.get_num_nodes();
+  ret.simplex_iterations_           = solution_.get_num_simplex_iterations();
+
+  return ret;
+}
+
+// ===========================
+// CPU LP Solution Conversion
+// ===========================
+
+template <typename i_t, typename f_t>
+cuopt::cython::cpu_linear_programming_ret_t
+cpu_lp_solution_t<i_t, f_t>::to_cpu_linear_programming_ret_t() &&
+{
+  cuopt::cython::cpu_linear_programming_ret_t ret;
+
+  // Move CPU solution data (std::vector move is zero-copy)
+  ret.primal_solution_ = std::move(primal_solution_);
+  ret.dual_solution_   = std::move(dual_solution_);
+  ret.reduced_cost_    = std::move(reduced_cost_);
+
+  // Warm start data (now embedded in pdlp_warm_start_data_ struct)
+  ret.current_primal_solution_ = std::move(pdlp_warm_start_data_.current_primal_solution_);
+  ret.current_dual_solution_   = std::move(pdlp_warm_start_data_.current_dual_solution_);
+  ret.initial_primal_average_  = std::move(pdlp_warm_start_data_.initial_primal_average_);
+  ret.initial_dual_average_    = std::move(pdlp_warm_start_data_.initial_dual_average_);
+  ret.current_ATY_             = std::move(pdlp_warm_start_data_.current_ATY_);
+  ret.sum_primal_solutions_    = std::move(pdlp_warm_start_data_.sum_primal_solutions_);
+  ret.sum_dual_solutions_      = std::move(pdlp_warm_start_data_.sum_dual_solutions_);
+  ret.last_restart_duality_gap_primal_solution_ =
+    std::move(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_);
+  ret.last_restart_duality_gap_dual_solution_ =
+    std::move(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_);
+
+  // Scalar warm start data
+  ret.initial_primal_weight_         = pdlp_warm_start_data_.initial_primal_weight_;
+  ret.initial_step_size_             = pdlp_warm_start_data_.initial_step_size_;
+  ret.total_pdlp_iterations_         = pdlp_warm_start_data_.total_pdlp_iterations_;
+  ret.total_pdhg_iterations_         = pdlp_warm_start_data_.total_pdhg_iterations_;
+  ret.last_candidate_kkt_score_      = pdlp_warm_start_data_.last_candidate_kkt_score_;
+  ret.last_restart_kkt_score_        = pdlp_warm_start_data_.last_restart_kkt_score_;
+  ret.sum_solution_weight_           = pdlp_warm_start_data_.sum_solution_weight_;
+  ret.iterations_since_last_restart_ = pdlp_warm_start_data_.iterations_since_last_restart_;
+
+  // Metadata and termination stats
+  ret.termination_status_ = termination_status_;
+  ret.error_status_       = error_status_.get_error_type();
+  ret.error_message_      = std::string(error_status_.what());
+  ret.l2_primal_residual_ = l2_primal_residual_;
+  ret.l2_dual_residual_   = l2_dual_residual_;
+  ret.primal_objective_   = primal_objective_;
+  ret.dual_objective_     = dual_objective_;
+  ret.gap_                = gap_;
+  ret.nb_iterations_      = num_iterations_;
+  ret.solve_time_         = solve_time_;
+  ret.solved_by_pdlp_     = solved_by_pdlp_;
+
+  return ret;
+}
+
+// ===========================
+// CPU MIP Solution Conversion
+// ===========================
+
+template <typename i_t, typename f_t>
+cuopt::cython::cpu_mip_ret_t cpu_mip_solution_t<i_t, f_t>::to_cpu_mip_ret_t() &&
+{
+  cuopt::cython::cpu_mip_ret_t ret;
+
+  // Move CPU solution data (std::vector move is zero-copy)
+  ret.solution_ = std::move(solution_);
+
+  // Metadata and termination stats
+  ret.termination_status_           = termination_status_;
+  ret.error_status_                 = error_status_.get_error_type();
+  ret.error_message_                = std::string(error_status_.what());
+  ret.objective_                    = objective_;
+  ret.mip_gap_                      = mip_gap_;
+  ret.solution_bound_               = solution_bound_;
+  ret.total_solve_time_             = total_solve_time_;
+  ret.presolve_time_                = presolve_time_;
+  ret.max_constraint_violation_     = max_constraint_violation_;
+  ret.max_int_violation_            = max_int_violation_;
+  ret.max_variable_bound_violation_ = max_variable_bound_violation_;
+  ret.nodes_                        = num_nodes_;
+  ret.simplex_iterations_           = num_simplex_iterations_;
+
+  return ret;
+}
+
+// Explicit template instantiations
+template cuopt::cython::linear_programming_ret_t
+gpu_lp_solution_t<int, double>::to_linear_programming_ret_t() &&;
+template cuopt::cython::mip_ret_t gpu_mip_solution_t<int, double>::to_mip_ret_t() &&;
+template cuopt::cython::cpu_linear_programming_ret_t
+cpu_lp_solution_t<int, double>::to_cpu_linear_programming_ret_t() &&;
+template cuopt::cython::cpu_mip_ret_t cpu_mip_solution_t<int, double>::to_cpu_mip_ret_t() &&;
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index db15eed82..501895f36 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -24,6 +24,9 @@
 #include <mip/solver.cuh>
 #include <mip/utilities/sort_csr.cuh>
 
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
@@ -44,7 +47,10 @@
 #include <raft/core/device_setter.hpp>
 #include <raft/core/handle.hpp>
 
-#include <thread>  // For std::thread
+#include <rmm/cuda_stream.hpp>
+
+#include <iostream>  // For std::cerr
+#include <thread>    // For std::thread
 
 #define CUOPT_LOG_CONDITIONAL_INFO(condition, ...) \
   if ((condition)) { CUOPT_LOG_INFO(__VA_ARGS__); }
@@ -1320,6 +1326,60 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
   return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
 }
 
+// ============================================================================
+// Interface-based solve overloads with remote execution support
+// ============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  pdlp_solver_settings_t<i_t, f_t> const& settings,
+  bool problem_checking,
+  bool use_pdlp_solver_mode,
+  bool is_batch_mode)
+{
+  // Check if remote execution is enabled
+  if (is_remote_execution_enabled()) {
+    CUOPT_LOG_INFO("Remote LP solve requested");
+    return problem_interface->solve_lp_remote(settings);
+  } else {
+    // Local execution - convert to optimization_problem_t and call original solve_lp
+    CUOPT_LOG_INFO("Local LP solve");
+
+    // Check if this is a CPU problem (test mode: CUOPT_USE_CPU_MEM_FOR_LOCAL=true)
+    auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+    if (cpu_prob != nullptr) {
+      CUOPT_LOG_INFO("Test mode: Converting CPU problem to GPU for local solve");
+
+      // Create CUDA resources for the conversion
+      rmm::cuda_stream stream;
+      raft::handle_t handle(stream);
+
+      // Set the handle on the CPU problem so it can create GPU resources
+      cpu_prob->set_handle(&handle);
+
+      // Convert CPU problem to GPU problem
+      auto op_problem = cpu_prob->to_optimization_problem();
+
+      // Solve on GPU
+      auto gpu_solution = solve_lp<i_t, f_t>(
+        op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+
+      // Wrap in GPU solution interface and convert to CPU solution
+      std::cerr << "Test mode: Converting GPU solution back to CPU solution" << std::endl;
+      gpu_lp_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
+      return gpu_sol_interface.to_cpu_solution();
+    }
+
+    auto op_problem   = problem_interface->to_optimization_problem();
+    auto gpu_solution = solve_lp<i_t, f_t>(
+      op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+
+    // Wrap GPU solution in interface and return
+    return std::make_unique<gpu_lp_solution_t<i_t, f_t>>(std::move(gpu_solution));
+  }
+}
+
 #define INSTANTIATE(F_TYPE)                                                            \
   template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
     optimization_problem_t<int, F_TYPE>& op_problem,                                   \
@@ -1335,6 +1395,13 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     bool problem_checking,                                                             \
     bool use_pdlp_solver_mode);                                                        \
                                                                                        \
+  template std::unique_ptr<lp_solution_interface_t<int, F_TYPE>> solve_lp(             \
+    optimization_problem_interface_t<int, F_TYPE>*,                                    \
+    pdlp_solver_settings_t<int, F_TYPE> const&,                                        \
+    bool,                                                                              \
+    bool,                                                                              \
+    bool);                                                                             \
+                                                                                       \
   template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(          \
     detail::problem_t<int, F_TYPE>& problem,                                           \
     pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
diff --git a/cpp/src/linear_programming/solve_remote.cu b/cpp/src/linear_programming/solve_remote.cu
new file mode 100644
index 000000000..29a74fd58
--- /dev/null
+++ b/cpp/src/linear_programming/solve_remote.cu
@@ -0,0 +1,286 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/solve.hpp>
+#include <utilities/logger.hpp>
+
+namespace cuopt::linear_programming {
+
+// ============================================================================
+// Remote execution stubs (placeholder implementations)
+// ============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings)
+{
+  CUOPT_LOG_INFO(
+    "solve_lp_remote (CPU problem) stub called - returning dummy solution for testing");
+
+  // TODO: Implement actual remote LP solving via gRPC
+  // For now, return a dummy solution with fake data (allows testing the full flow)
+  i_t n_vars        = cpu_problem.get_n_variables();
+  i_t n_constraints = cpu_problem.get_n_constraints();
+
+  std::vector<f_t> primal_solution(n_vars, 0.0);
+  std::vector<f_t> dual_solution(n_constraints, 0.0);
+  std::vector<f_t> reduced_cost(n_vars, 0.0);
+
+  // Create fake warm start data struct with recognizable non-zero values for testing
+  cpu_pdlp_warm_start_data_t<i_t, f_t> warmstart;
+  warmstart.current_primal_solution_                  = std::vector<f_t>(n_vars, 1.1);
+  warmstart.current_dual_solution_                    = std::vector<f_t>(n_constraints, 2.2);
+  warmstart.initial_primal_average_                   = std::vector<f_t>(n_vars, 3.3);
+  warmstart.initial_dual_average_                     = std::vector<f_t>(n_constraints, 4.4);
+  warmstart.current_ATY_                              = std::vector<f_t>(n_vars, 5.5);
+  warmstart.sum_primal_solutions_                     = std::vector<f_t>(n_vars, 6.6);
+  warmstart.sum_dual_solutions_                       = std::vector<f_t>(n_constraints, 7.7);
+  warmstart.last_restart_duality_gap_primal_solution_ = std::vector<f_t>(n_vars, 8.8);
+  warmstart.last_restart_duality_gap_dual_solution_   = std::vector<f_t>(n_constraints, 9.9);
+  warmstart.initial_primal_weight_                    = 99.1;
+  warmstart.initial_step_size_                        = 99.2;
+  warmstart.total_pdlp_iterations_                    = 100;
+  warmstart.total_pdhg_iterations_                    = 200;
+  warmstart.last_candidate_kkt_score_                 = 99.3;
+  warmstart.last_restart_kkt_score_                   = 99.4;
+  warmstart.sum_solution_weight_                      = 99.5;
+  warmstart.iterations_since_last_restart_            = 10;
+
+  auto solution = std::make_unique<cpu_lp_solution_t<i_t, f_t>>(
+    std::move(primal_solution),
+    std::move(dual_solution),
+    std::move(reduced_cost),
+    pdlp_termination_status_t::Optimal,  // Fake optimal status
+    0.0,                                 // Primal objective (zero solution)
+    0.0,                                 // Dual objective (zero solution)
+    0.01,                                // Dummy solve time
+    0.001,                               // l2_primal_residual
+    0.002,                               // l2_dual_residual
+    0.003,                               // gap
+    42,                                  // num_iterations
+    true,                                // solved_by_pdlp
+    std::move(warmstart)                 // warmstart data
+  );
+
+  return solution;
+}
+
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  cpu_optimization_problem_t<i_t, f_t>& cpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  CUOPT_LOG_INFO(
+    "solve_mip_remote (CPU problem) stub called - returning dummy solution for testing");
+
+  // TODO: Implement actual remote MIP solving via gRPC
+  // For now, return a dummy solution with fake data (allows testing the full flow)
+  i_t n_vars = cpu_problem.get_n_variables();
+
+  std::vector<f_t> solution(n_vars, 0.0);
+  auto mip_solution = std::make_unique<cpu_mip_solution_t<i_t, f_t>>(
+    std::move(solution),
+    mip_termination_status_t::Optimal,  // Fake optimal status
+    0.0,                                // Objective value (zero solution)
+    0.0,                                // MIP gap
+    0.0,                                // Solution bound
+    0.01,                               // Total solve time
+    0.0,                                // Presolve time
+    0.0,                                // Max constraint violation
+    0.0,                                // Max int violation
+    0.0,                                // Max variable bound violation
+    0,                                  // Number of nodes
+    0);                                 // Number of simplex iterations
+
+  return mip_solution;
+}
+
+// ============================================================================
+// Remote execution for GPU problems (converts to CPU then calls CPU remote)
+// ============================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  pdlp_solver_settings_t<i_t, f_t> const& settings)
+{
+  CUOPT_LOG_INFO("solve_lp_remote (GPU problem) - converting to CPU for remote execution");
+
+  // Convert GPU problem to CPU problem (copies device data to host)
+  auto cpu_problem = cpu_optimization_problem_t<i_t, f_t>(nullptr);  // No CUDA resources for remote
+
+  // Copy scalar properties
+  cpu_problem.set_maximize(gpu_problem.get_sense());
+  cpu_problem.set_objective_offset(gpu_problem.get_objective_offset());
+  cpu_problem.set_problem_category(gpu_problem.get_problem_category());
+
+  // Copy names
+  cpu_problem.set_problem_name(gpu_problem.get_problem_name());
+  cpu_problem.set_objective_name(gpu_problem.get_objective_name());
+  cpu_problem.set_variable_names(gpu_problem.get_variable_names());
+  cpu_problem.set_row_names(gpu_problem.get_row_names());
+
+  // Copy objective coefficients
+  auto obj_coeffs = gpu_problem.get_objective_coefficients_host();
+  if (!obj_coeffs.empty()) {
+    cpu_problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
+  }
+
+  // Copy constraint matrix (CSR format)
+  auto matrix_values  = gpu_problem.get_constraint_matrix_values_host();
+  auto matrix_indices = gpu_problem.get_constraint_matrix_indices_host();
+  auto matrix_offsets = gpu_problem.get_constraint_matrix_offsets_host();
+  if (!matrix_values.empty()) {
+    cpu_problem.set_csr_constraint_matrix(matrix_values.data(),
+                                          matrix_values.size(),
+                                          matrix_indices.data(),
+                                          matrix_indices.size(),
+                                          matrix_offsets.data(),
+                                          matrix_offsets.size());
+  }
+
+  // Copy constraint bounds
+  auto constraint_lb = gpu_problem.get_constraint_lower_bounds_host();
+  auto constraint_ub = gpu_problem.get_constraint_upper_bounds_host();
+  if (!constraint_lb.empty()) {
+    cpu_problem.set_constraint_lower_bounds(constraint_lb.data(), constraint_lb.size());
+  }
+  if (!constraint_ub.empty()) {
+    cpu_problem.set_constraint_upper_bounds(constraint_ub.data(), constraint_ub.size());
+  }
+
+  // Copy variable bounds
+  auto var_lb = gpu_problem.get_variable_lower_bounds_host();
+  auto var_ub = gpu_problem.get_variable_upper_bounds_host();
+  if (!var_lb.empty()) { cpu_problem.set_variable_lower_bounds(var_lb.data(), var_lb.size()); }
+  if (!var_ub.empty()) { cpu_problem.set_variable_upper_bounds(var_ub.data(), var_ub.size()); }
+
+  // Copy variable types
+  auto var_types = gpu_problem.get_variable_types_host();
+  if (!var_types.empty()) { cpu_problem.set_variable_types(var_types.data(), var_types.size()); }
+
+  // Copy quadratic objective if present
+  if (gpu_problem.has_quadratic_objective()) {
+    auto quad_offsets = gpu_problem.get_quadratic_objective_offsets();
+    auto quad_indices = gpu_problem.get_quadratic_objective_indices();
+    auto quad_values  = gpu_problem.get_quadratic_objective_values();
+    cpu_problem.set_quadratic_objective_matrix(quad_values.data(),
+                                               quad_values.size(),
+                                               quad_indices.data(),
+                                               quad_indices.size(),
+                                               quad_offsets.data(),
+                                               quad_offsets.size());
+  }
+
+  // Call CPU remote solver (returns unique_ptr<lp_solution_interface_t>)
+  auto cpu_solution_interface = solve_lp_remote(cpu_problem, settings);
+
+  // Convert CPU solution back to GPU solution (since we started with a GPU problem)
+  auto gpu_solution = cpu_solution_interface->to_gpu_solution(rmm::cuda_stream_per_thread);
+  return std::make_unique<gpu_lp_solution_t<i_t, f_t>>(std::move(gpu_solution));
+}
+
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
+  gpu_optimization_problem_t<i_t, f_t>& gpu_problem,
+  mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  CUOPT_LOG_INFO("solve_mip_remote (GPU problem) - converting to CPU for remote execution");
+
+  // Convert GPU problem to CPU problem (copies device data to host)
+  auto cpu_problem = cpu_optimization_problem_t<i_t, f_t>(nullptr);  // No CUDA resources for remote
+
+  // Copy scalar properties
+  cpu_problem.set_maximize(gpu_problem.get_sense());
+  cpu_problem.set_objective_offset(gpu_problem.get_objective_offset());
+  cpu_problem.set_problem_category(gpu_problem.get_problem_category());
+
+  // Copy names
+  cpu_problem.set_problem_name(gpu_problem.get_problem_name());
+  cpu_problem.set_objective_name(gpu_problem.get_objective_name());
+  cpu_problem.set_variable_names(gpu_problem.get_variable_names());
+  cpu_problem.set_row_names(gpu_problem.get_row_names());
+
+  // Copy objective coefficients
+  auto obj_coeffs = gpu_problem.get_objective_coefficients_host();
+  if (!obj_coeffs.empty()) {
+    cpu_problem.set_objective_coefficients(obj_coeffs.data(), obj_coeffs.size());
+  }
+
+  // Copy constraint matrix (CSR format)
+  auto matrix_values  = gpu_problem.get_constraint_matrix_values_host();
+  auto matrix_indices = gpu_problem.get_constraint_matrix_indices_host();
+  auto matrix_offsets = gpu_problem.get_constraint_matrix_offsets_host();
+  if (!matrix_values.empty()) {
+    cpu_problem.set_csr_constraint_matrix(matrix_values.data(),
+                                          matrix_values.size(),
+                                          matrix_indices.data(),
+                                          matrix_indices.size(),
+                                          matrix_offsets.data(),
+                                          matrix_offsets.size());
+  }
+
+  // Copy constraint bounds
+  auto constraint_lb = gpu_problem.get_constraint_lower_bounds_host();
+  auto constraint_ub = gpu_problem.get_constraint_upper_bounds_host();
+  if (!constraint_lb.empty()) {
+    cpu_problem.set_constraint_lower_bounds(constraint_lb.data(), constraint_lb.size());
+  }
+  if (!constraint_ub.empty()) {
+    cpu_problem.set_constraint_upper_bounds(constraint_ub.data(), constraint_ub.size());
+  }
+
+  // Copy variable bounds
+  auto var_lb = gpu_problem.get_variable_lower_bounds_host();
+  auto var_ub = gpu_problem.get_variable_upper_bounds_host();
+  if (!var_lb.empty()) { cpu_problem.set_variable_lower_bounds(var_lb.data(), var_lb.size()); }
+  if (!var_ub.empty()) { cpu_problem.set_variable_upper_bounds(var_ub.data(), var_ub.size()); }
+
+  // Copy variable types
+  auto var_types = gpu_problem.get_variable_types_host();
+  if (!var_types.empty()) { cpu_problem.set_variable_types(var_types.data(), var_types.size()); }
+
+  // Copy quadratic objective if present
+  if (gpu_problem.has_quadratic_objective()) {
+    auto quad_offsets = gpu_problem.get_quadratic_objective_offsets();
+    auto quad_indices = gpu_problem.get_quadratic_objective_indices();
+    auto quad_values  = gpu_problem.get_quadratic_objective_values();
+    cpu_problem.set_quadratic_objective_matrix(quad_values.data(),
+                                               quad_values.size(),
+                                               quad_indices.data(),
+                                               quad_indices.size(),
+                                               quad_offsets.data(),
+                                               quad_offsets.size());
+  }
+
+  // Call CPU remote solver (returns unique_ptr<mip_solution_interface_t>)
+  auto cpu_solution_interface = solve_mip_remote(cpu_problem, settings);
+
+  // Convert CPU solution back to GPU solution (since we started with a GPU problem)
+  auto gpu_solution = cpu_solution_interface->to_gpu_solution(rmm::cuda_stream_per_thread);
+  return std::make_unique<gpu_mip_solution_t<i_t, f_t>>(std::move(gpu_solution));
+}
+
+// Explicit template instantiations for remote execution stubs
+template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
+  cpu_optimization_problem_t<int, double>&, pdlp_solver_settings_t<int, double> const&);
+
+template std::unique_ptr<mip_solution_interface_t<int, double>> solve_mip_remote(
+  cpu_optimization_problem_t<int, double>&, mip_solver_settings_t<int, double> const&);
+
+template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
+  gpu_optimization_problem_t<int, double>&, pdlp_solver_settings_t<int, double> const&);
+
+template std::unique_ptr<mip_solution_interface_t<int, double>> solve_mip_remote(
+  gpu_optimization_problem_t<int, double>&, mip_solver_settings_t<int, double> const&);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/solver_settings.cu b/cpp/src/linear_programming/solver_settings.cu
index fc02b6f12..8f1d22827 100644
--- a/cpp/src/linear_programming/solver_settings.cu
+++ b/cpp/src/linear_programming/solver_settings.cu
@@ -361,6 +361,20 @@ pdlp_warm_start_data_t<i_t, f_t>& pdlp_solver_settings_t<i_t, f_t>::get_pdlp_war
   return pdlp_warm_start_data_;
 }
 
+template <typename i_t, typename f_t>
+const cpu_pdlp_warm_start_data_t<i_t, f_t>&
+pdlp_solver_settings_t<i_t, f_t>::get_cpu_pdlp_warm_start_data() const noexcept
+{
+  return cpu_pdlp_warm_start_data_;
+}
+
+template <typename i_t, typename f_t>
+cpu_pdlp_warm_start_data_t<i_t, f_t>&
+pdlp_solver_settings_t<i_t, f_t>::get_cpu_pdlp_warm_start_data() noexcept
+{
+  return cpu_pdlp_warm_start_data_;
+}
+
 template <typename i_t, typename f_t>
 const pdlp_warm_start_data_view_t<i_t, f_t>&
 pdlp_solver_settings_t<i_t, f_t>::get_pdlp_warm_start_data_view() const noexcept
diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu
index f49e2057b..07140bac0 100644
--- a/cpp/src/linear_programming/utilities/cython_solve.cu
+++ b/cpp/src/linear_programming/utilities/cython_solve.cu
@@ -6,7 +6,11 @@
 /* clang-format on */
 
 #include <cuopt/error.hpp>
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_utils.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
@@ -29,197 +33,59 @@
 namespace cuopt {
 namespace cython {
 
-using cuopt::linear_programming::var_t;
-
-static cuopt::linear_programming::optimization_problem_t<int, double>
-data_model_to_optimization_problem(
-  cuopt::mps_parser::data_model_view_t<int, double>* data_model,
-  cuopt::linear_programming::solver_settings_t<int, double>* solver_settings,
-  raft::handle_t const* handle_ptr)
-{
-  cuopt::linear_programming::optimization_problem_t<int, double> op_problem(handle_ptr);
-  op_problem.set_maximize(data_model->get_sense());
-  if (data_model->get_constraint_matrix_values().size() != 0 &&
-      data_model->get_constraint_matrix_indices().size() != 0 &&
-      data_model->get_constraint_matrix_offsets().size() != 0) {
-    op_problem.set_csr_constraint_matrix(data_model->get_constraint_matrix_values().data(),
-                                         data_model->get_constraint_matrix_values().size(),
-                                         data_model->get_constraint_matrix_indices().data(),
-                                         data_model->get_constraint_matrix_indices().size(),
-                                         data_model->get_constraint_matrix_offsets().data(),
-                                         data_model->get_constraint_matrix_offsets().size());
-  }
-  if (data_model->get_constraint_bounds().size() != 0) {
-    op_problem.set_constraint_bounds(data_model->get_constraint_bounds().data(),
-                                     data_model->get_constraint_bounds().size());
-  }
-  if (data_model->get_objective_coefficients().size() != 0) {
-    op_problem.set_objective_coefficients(data_model->get_objective_coefficients().data(),
-                                          data_model->get_objective_coefficients().size());
-  }
-  op_problem.set_objective_scaling_factor(data_model->get_objective_scaling_factor());
-  op_problem.set_objective_offset(data_model->get_objective_offset());
-
-  if (data_model->get_quadratic_objective_values().size() != 0 &&
-      data_model->get_quadratic_objective_indices().size() != 0 &&
-      data_model->get_quadratic_objective_offsets().size() != 0) {
-    op_problem.set_quadratic_objective_matrix(data_model->get_quadratic_objective_values().data(),
-                                              data_model->get_quadratic_objective_values().size(),
-                                              data_model->get_quadratic_objective_indices().data(),
-                                              data_model->get_quadratic_objective_indices().size(),
-                                              data_model->get_quadratic_objective_offsets().data(),
-                                              data_model->get_quadratic_objective_offsets().size());
-  }
-  if (data_model->get_variable_lower_bounds().size() != 0) {
-    op_problem.set_variable_lower_bounds(data_model->get_variable_lower_bounds().data(),
-                                         data_model->get_variable_lower_bounds().size());
-  }
-  if (data_model->get_variable_upper_bounds().size() != 0) {
-    op_problem.set_variable_upper_bounds(data_model->get_variable_upper_bounds().data(),
-                                         data_model->get_variable_upper_bounds().size());
-  }
-
-  if (data_model->get_row_types().size() != 0) {
-    op_problem.set_row_types(data_model->get_row_types().data(),
-                             data_model->get_row_types().size());
-  }
-  if (data_model->get_constraint_lower_bounds().size() != 0) {
-    op_problem.set_constraint_lower_bounds(data_model->get_constraint_lower_bounds().data(),
-                                           data_model->get_constraint_lower_bounds().size());
-  }
-  if (data_model->get_constraint_upper_bounds().size() != 0) {
-    op_problem.set_constraint_upper_bounds(data_model->get_constraint_upper_bounds().data(),
-                                           data_model->get_constraint_upper_bounds().size());
-  }
-
-  if (solver_settings->get_pdlp_warm_start_data_view()
-        .last_restart_duality_gap_dual_solution_.data() != nullptr) {
-    // Moved inside
-    cuopt::linear_programming::pdlp_warm_start_data_t<int, double> pdlp_warm_start_data(
-      solver_settings->get_pdlp_warm_start_data_view(), handle_ptr->get_stream());
-    solver_settings->get_pdlp_settings().set_pdlp_warm_start_data(pdlp_warm_start_data);
-  }
-
-  if (data_model->get_variable_types().size() != 0) {
-    std::vector<var_t> enum_variable_types(data_model->get_variable_types().size());
-    std::transform(
-      data_model->get_variable_types().data(),
-      data_model->get_variable_types().data() + data_model->get_variable_types().size(),
-      enum_variable_types.begin(),
-      [](const auto val) -> var_t { return val == 'I' ? var_t::INTEGER : var_t::CONTINUOUS; });
-    op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size());
-  }
-
-  if (data_model->get_variable_names().size() != 0) {
-    op_problem.set_variable_names(data_model->get_variable_names());
-  }
-
-  if (data_model->get_row_names().size() != 0) {
-    op_problem.set_row_names(data_model->get_row_names());
-  }
-
-  return op_problem;
-}
-
 /**
  * @brief Wrapper for linear_programming to expose the API to cython
  *
- * @param data_model Composable data model object
+ * @param problem_interface Problem interface (GPU or CPU backend)
  * @param solver_settings PDLP solver settings object
- * @return linear_programming_ret_t
+ * @return lp_solution_interface_t pointer (raw pointer, caller owns)
  */
-linear_programming_ret_t call_solve_lp(
-  cuopt::linear_programming::optimization_problem_t<int, double>& op_problem,
+cuopt::linear_programming::lp_solution_interface_t<int, double>* call_solve_lp(
+  cuopt::linear_programming::optimization_problem_interface_t<int, double>* problem_interface,
   cuopt::linear_programming::pdlp_solver_settings_t<int, double>& solver_settings,
   bool is_batch_mode)
 {
-  raft::common::nvtx::range fun_scope("Call Solve");
+  raft::common::nvtx::range fun_scope("Call Solve LP");
   cuopt_expects(
-    op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::LP,
+    problem_interface->get_problem_category() == cuopt::linear_programming::problem_category_t::LP,
     error_type_t::ValidationError,
     "LP solve cannot be called on a MIP problem!");
   const bool problem_checking     = true;
   const bool use_pdlp_solver_mode = true;
-  auto solution                   = cuopt::linear_programming::solve_lp(
-    op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
-  linear_programming_ret_t lp_ret{
-    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_dual_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_primal_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_dual_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_ATY_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()),
-    solution.get_pdlp_warm_start_data().initial_primal_weight_,
-    solution.get_pdlp_warm_start_data().initial_step_size_,
-    solution.get_pdlp_warm_start_data().total_pdlp_iterations_,
-    solution.get_pdlp_warm_start_data().total_pdhg_iterations_,
-    solution.get_pdlp_warm_start_data().last_candidate_kkt_score_,
-    solution.get_pdlp_warm_start_data().last_restart_kkt_score_,
-    solution.get_pdlp_warm_start_data().sum_solution_weight_,
-    solution.get_pdlp_warm_start_data().iterations_since_last_restart_,
-    solution.get_termination_status(),
-    solution.get_error_status().get_error_type(),
-    solution.get_error_status().what(),
-    solution.get_additional_termination_information().l2_primal_residual,
-    solution.get_additional_termination_information().l2_dual_residual,
-    solution.get_additional_termination_information().primal_objective,
-    solution.get_additional_termination_information().dual_objective,
-    solution.get_additional_termination_information().gap,
-    solution.get_additional_termination_information().number_of_steps_taken,
-    solution.get_additional_termination_information().solve_time,
-    solution.get_additional_termination_information().solved_by_pdlp};
-
-  return lp_ret;
+
+  // Solve returns unique_ptr<lp_solution_interface_t>
+  auto solution_interface = cuopt::linear_programming::solve_lp(
+    problem_interface, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
+
+  // Return raw pointer (Python wrapper will own and manage lifecycle)
+  return solution_interface.release();
 }
 
 /**
  * @brief Wrapper for linear_programming to expose the API to cython
  *
- * @param data_model Composable data model object
+ * @param problem_interface Problem interface (GPU or CPU backend)
  * @param solver_settings MIP solver settings object
- * @return mip_ret_t
+ * @return mip_solution_interface_t pointer (raw pointer, caller owns)
  */
-mip_ret_t call_solve_mip(
-  cuopt::linear_programming::optimization_problem_t<int, double>& op_problem,
+cuopt::linear_programming::mip_solution_interface_t<int, double>* call_solve_mip(
+  cuopt::linear_programming::optimization_problem_interface_t<int, double>* problem_interface,
   cuopt::linear_programming::mip_solver_settings_t<int, double>& solver_settings)
 {
-  raft::common::nvtx::range fun_scope("Call Solve");
-  cuopt_expects(
-    (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP) or
-      (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP),
-    error_type_t::ValidationError,
-    "MIP solve cannot be called on an LP problem!");
-  auto solution = cuopt::linear_programming::solve_mip(op_problem, solver_settings);
-  mip_ret_t mip_ret{std::make_unique<rmm::device_buffer>(solution.get_solution().release()),
-                    solution.get_termination_status(),
-                    solution.get_error_status().get_error_type(),
-                    solution.get_error_status().what(),
-                    solution.get_objective_value(),
-                    solution.get_mip_gap(),
-                    solution.get_solution_bound(),
-                    solution.get_total_solve_time(),
-                    solution.get_presolve_time(),
-                    solution.get_max_constraint_violation(),
-                    solution.get_max_int_violation(),
-                    solution.get_max_variable_bound_violation(),
-                    solution.get_num_nodes(),
-                    solution.get_num_simplex_iterations()};
-  return mip_ret;
+  raft::common::nvtx::range fun_scope("Call Solve MIP");
+  cuopt_expects((problem_interface->get_problem_category() ==
+                 cuopt::linear_programming::problem_category_t::MIP) or
+                  (problem_interface->get_problem_category() ==
+                   cuopt::linear_programming::problem_category_t::IP),
+                error_type_t::ValidationError,
+                "MIP solve cannot be called on an LP problem!");
+
+  // Solve returns unique_ptr<mip_solution_interface_t>
+  auto solution_interface =
+    cuopt::linear_programming::solve_mip(problem_interface, solver_settings);
+
+  // Return raw pointer (Python wrapper will own and manage lifecycle)
+  return solution_interface.release();
 }
 
 std::unique_ptr<solver_ret_t> call_solve(
@@ -229,55 +95,104 @@ std::unique_ptr<solver_ret_t> call_solve(
   bool is_batch_mode)
 {
   raft::common::nvtx::range fun_scope("Call Solve");
-  rmm::cuda_stream stream(static_cast<rmm::cuda_stream::flags>(flags));
-  const raft::handle_t handle_{stream};
+
+  // Determine memory backend based on execution mode
+  auto memory_backend = cuopt::linear_programming::get_memory_backend_type();
 
   solver_ret_t response;
 
-  auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_);
-  if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
-    response.lp_ret =
-      call_solve_lp(op_problem, solver_settings->get_pdlp_settings(), is_batch_mode);
-    response.problem_type = linear_programming::problem_category_t::LP;
-    // Reset stream to per-thread default as non-blocking stream is out of scope after the
-    // function returns.
-    response.lp_ret.primal_solution_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.dual_solution_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.reduced_cost_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.current_primal_solution_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.current_dual_solution_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.initial_primal_average_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.initial_dual_average_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.current_ATY_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.sum_primal_solutions_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.sum_dual_solutions_->set_stream(rmm::cuda_stream_per_thread);
-    response.lp_ret.last_restart_duality_gap_primal_solution_->set_stream(
-      rmm::cuda_stream_per_thread);
-    response.lp_ret.last_restart_duality_gap_dual_solution_->set_stream(
-      rmm::cuda_stream_per_thread);
-  } else {
-    response.mip_ret      = call_solve_mip(op_problem, solver_settings->get_mip_settings());
-    response.problem_type = linear_programming::problem_category_t::MIP;
-    // Reset stream to per-thread default as non-blocking stream is out of scope after the
-    // function returns.
-    response.mip_ret.solution_->set_stream(rmm::cuda_stream_per_thread);
-  }
+  // Create problem instance and CUDA resources based on memory backend
+  if (memory_backend == cuopt::linear_programming::memory_backend_t::GPU) {
+    // GPU memory backend: Create CUDA resources and GPU problem
+    rmm::cuda_stream stream(static_cast<rmm::cuda_stream::flags>(flags));
+    const raft::handle_t handle_{stream};
+
+    auto gpu_problem = cuopt::linear_programming::gpu_optimization_problem_t<int, double>(&handle_);
+    cuopt::linear_programming::populate_from_data_model_view(
+      &gpu_problem, data_model, solver_settings, &handle_);
+
+    // Call appropriate solve function and convert to ret struct
+    if (gpu_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
+      // Solve and get solution interface pointer
+      auto lp_solution_ptr =
+        std::unique_ptr<linear_programming::lp_solution_interface_t<int, double>>(
+          call_solve_lp(&gpu_problem, solver_settings->get_pdlp_settings(), is_batch_mode));
+
+      response.lp_ret       = std::move(*lp_solution_ptr).to_python_lp_ret();
+      response.problem_type = linear_programming::problem_category_t::LP;
+
+      // Reset stream to per-thread default as non-blocking stream is out of scope after the
+      // function returns.
+      // Note: GPU backend returns linear_programming_ret_t variant
+      auto& lp = std::get<linear_programming_ret_t>(response.lp_ret);
+      lp.primal_solution_->set_stream(rmm::cuda_stream_per_thread);
+      lp.dual_solution_->set_stream(rmm::cuda_stream_per_thread);
+      lp.reduced_cost_->set_stream(rmm::cuda_stream_per_thread);
+      lp.current_primal_solution_->set_stream(rmm::cuda_stream_per_thread);
+      lp.current_dual_solution_->set_stream(rmm::cuda_stream_per_thread);
+      lp.initial_primal_average_->set_stream(rmm::cuda_stream_per_thread);
+      lp.initial_dual_average_->set_stream(rmm::cuda_stream_per_thread);
+      lp.current_ATY_->set_stream(rmm::cuda_stream_per_thread);
+      lp.sum_primal_solutions_->set_stream(rmm::cuda_stream_per_thread);
+      lp.sum_dual_solutions_->set_stream(rmm::cuda_stream_per_thread);
+      lp.last_restart_duality_gap_primal_solution_->set_stream(rmm::cuda_stream_per_thread);
+      lp.last_restart_duality_gap_dual_solution_->set_stream(rmm::cuda_stream_per_thread);
+
+    } else {
+      // MIP solve
+      auto mip_solution_ptr =
+        std::unique_ptr<linear_programming::mip_solution_interface_t<int, double>>(
+          call_solve_mip(&gpu_problem, solver_settings->get_mip_settings()));
+
+      response.mip_ret      = std::move(*mip_solution_ptr).to_python_mip_ret();
+      response.problem_type = linear_programming::problem_category_t::MIP;
+
+      // Reset stream to per-thread default as non-blocking stream is out of scope after the
+      // function returns.
+      // Note: GPU backend returns mip_ret_t variant
+      auto& mip = std::get<mip_ret_t>(response.mip_ret);
+      mip.solution_->set_stream(rmm::cuda_stream_per_thread);
+    }
+
+    // Reset warmstart data streams in solver_settings
+    auto& warmstart_data = solver_settings->get_pdlp_settings().get_pdlp_warm_start_data();
+    if (warmstart_data.current_primal_solution_.size() > 0) {
+      warmstart_data.current_primal_solution_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.current_dual_solution_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.initial_primal_average_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.initial_dual_average_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.current_ATY_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.sum_primal_solutions_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.sum_dual_solutions_.set_stream(rmm::cuda_stream_per_thread);
+      warmstart_data.last_restart_duality_gap_primal_solution_.set_stream(
+        rmm::cuda_stream_per_thread);
+      warmstart_data.last_restart_duality_gap_dual_solution_.set_stream(
+        rmm::cuda_stream_per_thread);
+    }
 
-  // Reset warmstart data streams in solver_settings to per-thread default before destroying our
-  // local stream. The warmstart data was created using our stream and its uvectors are associated
-  // with it.
-  auto& warmstart_data = solver_settings->get_pdlp_settings().get_pdlp_warm_start_data();
-  if (warmstart_data.current_primal_solution_.size() > 0) {
-    warmstart_data.current_primal_solution_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.current_dual_solution_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.initial_primal_average_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.initial_dual_average_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.current_ATY_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.sum_primal_solutions_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.sum_dual_solutions_.set_stream(rmm::cuda_stream_per_thread);
-    warmstart_data.last_restart_duality_gap_primal_solution_.set_stream(
-      rmm::cuda_stream_per_thread);
-    warmstart_data.last_restart_duality_gap_dual_solution_.set_stream(rmm::cuda_stream_per_thread);
+  } else {
+    // CPU memory backend: No CUDA resources, create CPU problem for remote execution
+    auto cpu_problem = cuopt::linear_programming::cpu_optimization_problem_t<int, double>(nullptr);
+    cuopt::linear_programming::populate_from_data_model_view(
+      &cpu_problem, data_model, solver_settings, nullptr);
+
+    // Call appropriate solve function and convert to CPU ret struct
+    if (cpu_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
+      auto lp_solution_ptr =
+        std::unique_ptr<linear_programming::lp_solution_interface_t<int, double>>(
+          call_solve_lp(&cpu_problem, solver_settings->get_pdlp_settings(), is_batch_mode));
+
+      response.lp_ret       = std::move(*lp_solution_ptr).to_python_lp_ret();
+      response.problem_type = linear_programming::problem_category_t::LP;
+
+    } else {
+      auto mip_solution_ptr =
+        std::unique_ptr<linear_programming::mip_solution_interface_t<int, double>>(
+          call_solve_mip(&cpu_problem, solver_settings->get_mip_settings()));
+
+      response.mip_ret      = std::move(*mip_solution_ptr).to_python_mip_ret();
+      response.problem_type = linear_programming::problem_category_t::MIP;
+    }
   }
 
   return std::make_unique<solver_ret_t>(std::move(response));
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index ee852fb29..327c50808 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -24,8 +24,10 @@
 #include <utilities/timer.hpp>
 #include <utilities/version_info.hpp>
 
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
@@ -37,8 +39,12 @@
 #include <raft/common/nvtx.hpp>
 #include <raft/core/handle.hpp>
 
+#include <rmm/cuda_stream.hpp>
+
 #include <cuda_profiler_api.h>
 
+#include <iostream>  // For std::cerr
+
 namespace cuopt::linear_programming {
 
 // This serves as both a warm up but also a mandatory initial call to setup cuSparse and cuBLAS
@@ -344,15 +350,69 @@ mip_solution_t<i_t, f_t> solve_mip(
   return solve_mip(op_problem, settings);
 }
 
-#define INSTANTIATE(F_TYPE)                                                 \
-  template mip_solution_t<int, F_TYPE> solve_mip(                           \
-    optimization_problem_t<int, F_TYPE>& op_problem,                        \
-    mip_solver_settings_t<int, F_TYPE> const& settings);                    \
-                                                                            \
-  template mip_solution_t<int, F_TYPE> solve_mip(                           \
-    raft::handle_t const* handle_ptr,                                       \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model, \
-    mip_solver_settings_t<int, F_TYPE> const& settings);
+/**
+ * @brief Solve MIP using polymorphic problem interface
+ *
+ * This overload accepts the abstract optimization_problem_interface_t, allowing
+ * both GPU and CPU-backed problems. Handles remote execution and test mode.
+ */
+template <typename i_t, typename f_t>
+std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip(
+  optimization_problem_interface_t<i_t, f_t>* problem_interface,
+  mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  // Check if remote execution is enabled
+  if (is_remote_execution_enabled()) {
+    CUOPT_LOG_INFO("Remote MIP solve requested");
+    return problem_interface->solve_mip_remote(settings);
+  } else {
+    // Local execution - convert to optimization_problem_t and call original solve_mip
+    CUOPT_LOG_INFO("Local MIP solve");
+
+    // Check if this is a CPU problem (test mode: CUOPT_USE_CPU_MEM_FOR_LOCAL=true)
+    auto* cpu_prob = dynamic_cast<cpu_optimization_problem_t<i_t, f_t>*>(problem_interface);
+    if (cpu_prob != nullptr) {
+      CUOPT_LOG_INFO("Test mode: Converting CPU problem to GPU for local MIP solve");
+
+      // Create CUDA resources for the conversion
+      rmm::cuda_stream stream;
+      raft::handle_t handle(stream);
+
+      // Set the handle on the CPU problem so it can create GPU resources
+      cpu_prob->set_handle(&handle);
+
+      // Convert CPU problem to GPU problem
+      auto op_problem = cpu_prob->to_optimization_problem();
+
+      // Solve on GPU
+      auto gpu_solution = solve_mip<i_t, f_t>(op_problem, settings);
+
+      // Wrap in GPU solution interface and convert to CPU solution
+      std::cerr << "Test mode: Converting GPU solution back to CPU solution" << std::endl;
+      gpu_mip_solution_t<i_t, f_t> gpu_sol_interface(std::move(gpu_solution));
+      return gpu_sol_interface.to_cpu_solution();
+    }
+
+    auto op_problem   = problem_interface->to_optimization_problem();
+    auto gpu_solution = solve_mip<i_t, f_t>(op_problem, settings);
+
+    // Wrap GPU solution in interface and return
+    return std::make_unique<gpu_mip_solution_t<i_t, f_t>>(std::move(gpu_solution));
+  }
+}
+
+#define INSTANTIATE(F_TYPE)                                                  \
+  template mip_solution_t<int, F_TYPE> solve_mip(                            \
+    optimization_problem_t<int, F_TYPE>& op_problem,                         \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                     \
+                                                                             \
+  template mip_solution_t<int, F_TYPE> solve_mip(                            \
+    raft::handle_t const* handle_ptr,                                        \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,  \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                     \
+                                                                             \
+  template std::unique_ptr<mip_solution_interface_t<int, F_TYPE>> solve_mip( \
+    optimization_problem_interface_t<int, F_TYPE>*, mip_solver_settings_t<int, F_TYPE> const&);
 
 #if MIP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
diff --git a/cpp/tests/linear_programming/CMakeLists.txt b/cpp/tests/linear_programming/CMakeLists.txt
index 40e284baf..d1e2673cf 100644
--- a/cpp/tests/linear_programming/CMakeLists.txt
+++ b/cpp/tests/linear_programming/CMakeLists.txt
@@ -6,6 +6,7 @@
 ConfigureTest(LP_UNIT_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/optimization_problem_test.cu
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solver_settings_test.cu
+    ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solution_interface_test.cu
 )# ##################################################################################################
 # - Linear programming PDLP tests ----------------------------------------------------------------------
 ConfigureTest(PDLP_TEST
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
index 799a42914..f199fd15a 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
@@ -1457,3 +1457,362 @@ cuopt_int_t test_write_problem(const char* input_filename, const char* output_fi
   cuOptDestroySolution(&solution);
   return status;
 }
+
+/**
+ * Test that calling MIP-only methods on LP solution returns CUOPT_INVALID_ARGUMENT
+ */
+cuopt_int_t test_lp_solution_mip_methods(const char* lp_filename)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_float_t mip_gap;
+  cuopt_float_t solution_bound;
+
+  printf("Testing LP solution with MIP-only methods...\n");
+
+  status = cuOptReadProblem(lp_filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading LP problem: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting method: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving LP: %d\n", status);
+    goto DONE;
+  }
+
+  /* Calling get_mip_gap on LP solution should return CUOPT_INVALID_ARGUMENT */
+  status = cuOptGetMIPGap(solution, &mip_gap);
+  if (status != CUOPT_INVALID_ARGUMENT) {
+    printf("Error: cuOptGetMIPGap on LP should return CUOPT_INVALID_ARGUMENT, got %d\n", status);
+    status = -1;
+    goto DONE;
+  }
+
+  /* Calling get_solution_bound on LP solution should return CUOPT_INVALID_ARGUMENT */
+  status = cuOptGetSolutionBound(solution, &solution_bound);
+  if (status != CUOPT_INVALID_ARGUMENT) {
+    printf("Error: cuOptGetSolutionBound on LP should return CUOPT_INVALID_ARGUMENT, got %d\n",
+           status);
+    status = -1;
+    goto DONE;
+  }
+
+  printf("LP solution MIP methods test passed\n");
+  status = CUOPT_SUCCESS;
+
+DONE:
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
+
+/**
+ * Test that calling LP-only methods on MIP solution returns CUOPT_INVALID_ARGUMENT
+ */
+cuopt_int_t test_mip_solution_lp_methods(const char* mip_filename)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_float_t dual_objective;
+  cuopt_float_t* dual_solution   = NULL;
+  cuopt_float_t* reduced_costs   = NULL;
+  cuopt_int_t num_constraints    = 0;
+  cuopt_int_t num_variables      = 0;
+
+  printf("Testing MIP solution with LP-only methods...\n");
+
+  status = cuOptReadProblem(mip_filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading MIP problem: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetNumConstraints(problem, &num_constraints);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting num constraints: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetNumVariables(problem, &num_variables);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting num variables: %d\n", status);
+    goto DONE;
+  }
+
+  dual_solution = (cuopt_float_t*)malloc(num_constraints * sizeof(cuopt_float_t));
+  reduced_costs = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t));
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving MIP: %d\n", status);
+    goto DONE;
+  }
+
+  /* Calling get_dual_objective_value on MIP solution should return CUOPT_INVALID_ARGUMENT */
+  status = cuOptGetDualObjectiveValue(solution, &dual_objective);
+  if (status != CUOPT_INVALID_ARGUMENT) {
+    printf(
+      "Error: cuOptGetDualObjectiveValue on MIP should return CUOPT_INVALID_ARGUMENT, got %d\n",
+      status);
+    status = -1;
+    goto DONE;
+  }
+
+  /* Calling get_dual_solution on MIP solution should return CUOPT_INVALID_ARGUMENT */
+  status = cuOptGetDualSolution(solution, dual_solution);
+  if (status != CUOPT_INVALID_ARGUMENT) {
+    printf("Error: cuOptGetDualSolution on MIP should return CUOPT_INVALID_ARGUMENT, got %d\n",
+           status);
+    status = -1;
+    goto DONE;
+  }
+
+  /* Calling get_reduced_costs on MIP solution should return CUOPT_INVALID_ARGUMENT */
+  status = cuOptGetReducedCosts(solution, reduced_costs);
+  if (status != CUOPT_INVALID_ARGUMENT) {
+    printf("Error: cuOptGetReducedCosts on MIP should return CUOPT_INVALID_ARGUMENT, got %d\n",
+           status);
+    status = -1;
+    goto DONE;
+  }
+
+  printf("MIP solution LP methods test passed\n");
+  status = CUOPT_SUCCESS;
+
+DONE:
+  free(dual_solution);
+  free(reduced_costs);
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
+
+/**
+ * Test CPU-only execution with CUDA_VISIBLE_DEVICES="" and remote execution enabled.
+ * This simulates a CPU host without GPU access.
+ * Note: Environment variables must be set before calling this function.
+ */
+cuopt_int_t test_cpu_only_execution(const char* filename)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_int_t termination_status;
+  cuopt_float_t objective_value;
+  cuopt_float_t solve_time;
+  cuopt_int_t num_variables;
+  cuopt_int_t num_constraints;
+  cuopt_float_t* primal_solution = NULL;
+
+  printf("Testing CPU-only execution (simulated remote mode)...\n");
+  printf("  CUDA_VISIBLE_DEVICES=%s\n", getenv("CUDA_VISIBLE_DEVICES") ? getenv("CUDA_VISIBLE_DEVICES") : "(not set)");
+  printf("  CUOPT_REMOTE_HOST=%s\n", getenv("CUOPT_REMOTE_HOST") ? getenv("CUOPT_REMOTE_HOST") : "(not set)");
+
+  status = cuOptReadProblem(filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading problem: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetNumVariables(problem, &num_variables);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting num variables: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetNumConstraints(problem, &num_constraints);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting num constraints: %d\n", status);
+    goto DONE;
+  }
+
+  printf("  Problem: %d variables, %d constraints\n", num_variables, num_constraints);
+
+  primal_solution = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t));
+  if (primal_solution == NULL) {
+    printf("Error allocating primal solution\n");
+    status = -1;
+    goto DONE;
+  }
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting method: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving problem: %d\n", status);
+    goto DONE;
+  }
+
+  /* Verify we can retrieve all solution properties without CUDA errors */
+  status = cuOptGetTerminationStatus(solution, &termination_status);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting termination status: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetObjectiveValue(solution, &objective_value);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting objective value: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetSolveTime(solution, &solve_time);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting solve time: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetPrimalSolution(solution, primal_solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting primal solution: %d\n", status);
+    goto DONE;
+  }
+
+  printf("CPU-only execution test passed\n");
+  printf("  Termination status: %s\n", termination_status_to_string(termination_status));
+  printf("  Objective value: %f\n", objective_value);
+  printf("  Solve time: %f\n", solve_time);
+  printf("  Primal solution[0]: %f\n", primal_solution[0]);
+
+  status = CUOPT_SUCCESS;
+
+DONE:
+  free(primal_solution);
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
+
+/**
+ * Test CPU-only MIP execution with CUDA_VISIBLE_DEVICES="" and remote execution enabled.
+ */
+cuopt_int_t test_cpu_only_mip_execution(const char* filename)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_int_t termination_status;
+  cuopt_float_t objective_value;
+  cuopt_float_t solve_time;
+  cuopt_float_t mip_gap;
+  cuopt_int_t num_variables;
+  cuopt_float_t* primal_solution = NULL;
+
+  printf("Testing CPU-only MIP execution (simulated remote mode)...\n");
+
+  status = cuOptReadProblem(filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading MIP problem: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetNumVariables(problem, &num_variables);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting num variables: %d\n", status);
+    goto DONE;
+  }
+
+  primal_solution = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t));
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 60.0);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting time limit: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving MIP: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetTerminationStatus(solution, &termination_status);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting termination status: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetObjectiveValue(solution, &objective_value);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting objective value: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetSolveTime(solution, &solve_time);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting solve time: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetMIPGap(solution, &mip_gap);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting MIP gap: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetPrimalSolution(solution, primal_solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting primal solution: %d\n", status);
+    goto DONE;
+  }
+
+  printf("CPU-only MIP execution test passed\n");
+  printf("  Termination status: %s\n", termination_status_to_string(termination_status));
+  printf("  Objective value: %f\n", objective_value);
+  printf("  MIP gap: %f\n", mip_gap);
+  printf("  Solve time: %f\n", solve_time);
+
+  status = CUOPT_SUCCESS;
+
+DONE:
+  free(primal_solution);
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 273924ec0..e133ce262 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -7,8 +7,10 @@
 
 #include "c_api_tests.h"
 
+#include <cstdlib>
 #include <filesystem>
 #include <iostream>
+#include <string>
 
 #include <cuopt/linear_programming/cuopt_c.h>
 #include <linear_programming/cuopt_c_internal.hpp>
@@ -178,8 +180,9 @@ static bool test_mps_roundtrip(const std::string& mps_file_path)
     auto* original_problem_wrapper = static_cast<problem_and_stream_view_t*>(original_handle);
     auto* reread_problem_wrapper   = static_cast<problem_and_stream_view_t*>(reread_handle);
 
-    result =
-      original_problem_wrapper->op_problem->is_equivalent(*reread_problem_wrapper->op_problem);
+    // Use the interface method to compare (works for both CPU and GPU backends)
+    result = original_problem_wrapper->get_problem()->is_equivalent(
+      *reread_problem_wrapper->get_problem());
   }
 
 cleanup:
@@ -225,3 +228,100 @@ INSTANTIATE_TEST_SUITE_P(c_api,
                                            "/mip/enlight_hard.mps",
                                            "/mip/enlight11.mps",
                                            "/mip/supportcase22.mps"));
+
+// =============================================================================
+// Solution Interface Polymorphism Tests
+// =============================================================================
+
+TEST(c_api, lp_solution_mip_methods)
+{
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string lp_file = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+  EXPECT_EQ(test_lp_solution_mip_methods(lp_file.c_str()), CUOPT_SUCCESS);
+}
+
+TEST(c_api, mip_solution_lp_methods)
+{
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string mip_file                    = rapidsDatasetRootDir + "/mip/bb_optimality.mps";
+  EXPECT_EQ(test_mip_solution_lp_methods(mip_file.c_str()), CUOPT_SUCCESS);
+}
+
+// =============================================================================
+// CPU-Only Execution Tests
+// These tests verify that cuOpt can run on a CPU-only host with remote execution
+// enabled. The remote solve stubs return dummy results.
+// =============================================================================
+
+// Helper to set environment variables for CPU-only mode
+class CPUOnlyTestEnvironment {
+ public:
+  CPUOnlyTestEnvironment()
+  {
+    // Save original values
+    const char* cuda_visible = getenv("CUDA_VISIBLE_DEVICES");
+    const char* remote_host  = getenv("CUOPT_REMOTE_HOST");
+    const char* remote_port  = getenv("CUOPT_REMOTE_PORT");
+
+    orig_cuda_visible_ = cuda_visible ? cuda_visible : "";
+    orig_remote_host_  = remote_host ? remote_host : "";
+    orig_remote_port_  = remote_port ? remote_port : "";
+    cuda_was_set_      = (cuda_visible != nullptr);
+    host_was_set_      = (remote_host != nullptr);
+    port_was_set_      = (remote_port != nullptr);
+
+    // Set CPU-only environment
+    setenv("CUDA_VISIBLE_DEVICES", "", 1);
+    setenv("CUOPT_REMOTE_HOST", "localhost", 1);
+    setenv("CUOPT_REMOTE_PORT", "12345", 1);
+  }
+
+  ~CPUOnlyTestEnvironment()
+  {
+    // Restore original values
+    if (cuda_was_set_) {
+      setenv("CUDA_VISIBLE_DEVICES", orig_cuda_visible_.c_str(), 1);
+    } else {
+      unsetenv("CUDA_VISIBLE_DEVICES");
+    }
+
+    if (host_was_set_) {
+      setenv("CUOPT_REMOTE_HOST", orig_remote_host_.c_str(), 1);
+    } else {
+      unsetenv("CUOPT_REMOTE_HOST");
+    }
+
+    if (port_was_set_) {
+      setenv("CUOPT_REMOTE_PORT", orig_remote_port_.c_str(), 1);
+    } else {
+      unsetenv("CUOPT_REMOTE_PORT");
+    }
+  }
+
+ private:
+  std::string orig_cuda_visible_;
+  std::string orig_remote_host_;
+  std::string orig_remote_port_;
+  bool cuda_was_set_;
+  bool host_was_set_;
+  bool port_was_set_;
+};
+
+TEST(c_api_cpu_only, lp_solve)
+{
+  CPUOnlyTestEnvironment env;
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string lp_file = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+  EXPECT_EQ(test_cpu_only_execution(lp_file.c_str()), CUOPT_SUCCESS);
+}
+
+TEST(c_api_cpu_only, mip_solve)
+{
+  CPUOnlyTestEnvironment env;
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string mip_file                    = rapidsDatasetRootDir + "/mip/bb_optimality.mps";
+  EXPECT_EQ(test_cpu_only_mip_execution(mip_file.c_str()), CUOPT_SUCCESS);
+}
+
+// Note: cuopt_cli subprocess tests are in Python (test_cpu_only_execution.py)
+// which provides better cross-platform subprocess handling
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
index 179d7deea..e9ef908fa 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
@@ -37,6 +37,15 @@ cuopt_int_t test_quadratic_problem(cuopt_int_t* termination_status_ptr,
 cuopt_int_t test_quadratic_ranged_problem(cuopt_int_t* termination_status_ptr,
                                           cuopt_float_t* objective_ptr);
 cuopt_int_t test_write_problem(const char* input_filename, const char* output_filename);
+
+/* Tests for solution interface polymorphism */
+cuopt_int_t test_lp_solution_mip_methods(const char* lp_filename);
+cuopt_int_t test_mip_solution_lp_methods(const char* mip_filename);
+
+/* CPU-only execution tests (require env vars CUDA_VISIBLE_DEVICES="" and CUOPT_REMOTE_HOST) */
+cuopt_int_t test_cpu_only_execution(const char* filename);
+cuopt_int_t test_cpu_only_mip_execution(const char* filename);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu
new file mode 100644
index 000000000..e6d0039fd
--- /dev/null
+++ b/cpp/tests/linear_programming/unit_tests/solution_interface_test.cu
@@ -0,0 +1,556 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+/**
+ * @file solution_interface_test.cu
+ * @brief Tests for optimization_problem_solution_interface_t polymorphic methods
+ *        and GPU/CPU conversion functions
+ *
+ * Tests:
+ * - LP solutions throw std::logic_error when calling MIP-only methods
+ * - MIP solutions throw std::logic_error when calling LP-only methods
+ * - Polymorphic methods work correctly for both LP and MIP solutions
+ * - GPU ↔ CPU problem conversions
+ * - GPU ↔ CPU solution conversions
+ * - GPU ↔ CPU warmstart data conversions
+ * - MPS data model to problem conversions
+ * - Solution to Python return type conversions
+ */
+
+#include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
+#include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
+#include <cuopt/linear_programming/optimization_problem_interface.hpp>
+#include <cuopt/linear_programming/optimization_problem_utils.hpp>
+#include <cuopt/linear_programming/solve.hpp>
+#include <mps_parser/parser.hpp>
+#include <utilities/common_utils.hpp>
+
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+
+namespace cuopt::linear_programming {
+
+class SolutionInterfaceTest : public ::testing::Test {
+ protected:
+  void SetUp() override
+  {
+    const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+    lp_file_  = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+    mip_file_ = rapidsDatasetRootDir + "/mip/bb_optimality.mps";
+  }
+
+  std::string lp_file_;
+  std::string mip_file_;
+};
+
+// Test that LP solution throws when calling MIP-only methods
+TEST_F(SolutionInterfaceTest, lp_solution_throws_on_mip_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_lp(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // LP solution should throw on MIP-only methods
+  EXPECT_THROW(solution->get_mip_gap(), std::logic_error);
+  EXPECT_THROW(solution->get_solution_bound(), std::logic_error);
+}
+
+// Test that MIP solution throws when calling LP-only methods
+TEST_F(SolutionInterfaceTest, mip_solution_throws_on_lp_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(mip_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  mip_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_mip(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // MIP solution should throw on LP-only methods
+  EXPECT_THROW(solution->get_dual_solution(), std::logic_error);
+  EXPECT_THROW(solution->get_dual_objective_value(), std::logic_error);
+  EXPECT_THROW(solution->get_reduced_costs(), std::logic_error);
+}
+
+// Test that polymorphic methods work correctly for LP solutions
+TEST_F(SolutionInterfaceTest, lp_solution_polymorphic_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_lp(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // Base interface polymorphic methods should work
+  optimization_problem_solution_interface_t<int, double>* base_ptr = solution.get();
+
+  EXPECT_FALSE(base_ptr->is_mip());
+  EXPECT_NO_THROW(base_ptr->get_error_status());
+  EXPECT_NO_THROW(base_ptr->get_solve_time());
+  EXPECT_NO_THROW(base_ptr->get_solution_host());
+  EXPECT_NO_THROW(base_ptr->get_termination_status_int());
+  EXPECT_NO_THROW(base_ptr->get_objective_value());
+
+  // LP-specific polymorphic methods should work
+  EXPECT_NO_THROW(base_ptr->get_dual_solution());
+  EXPECT_NO_THROW(base_ptr->get_dual_objective_value());
+  EXPECT_NO_THROW(base_ptr->get_reduced_costs());
+}
+
+// Test that polymorphic methods work correctly for MIP solutions
+TEST_F(SolutionInterfaceTest, mip_solution_polymorphic_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(mip_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  mip_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_mip(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // Base interface polymorphic methods should work
+  optimization_problem_solution_interface_t<int, double>* base_ptr = solution.get();
+
+  EXPECT_TRUE(base_ptr->is_mip());
+  EXPECT_NO_THROW(base_ptr->get_error_status());
+  EXPECT_NO_THROW(base_ptr->get_solve_time());
+  EXPECT_NO_THROW(base_ptr->get_solution_host());
+  EXPECT_NO_THROW(base_ptr->get_termination_status_int());
+  EXPECT_NO_THROW(base_ptr->get_objective_value());
+
+  // MIP-specific polymorphic methods should work
+  EXPECT_NO_THROW(base_ptr->get_mip_gap());
+  EXPECT_NO_THROW(base_ptr->get_solution_bound());
+}
+
+// Test get_termination_status_int returns valid values
+TEST_F(SolutionInterfaceTest, termination_status_int_values)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_lp(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  int status = solution->get_termination_status_int();
+  // Should be a valid termination status constant
+  EXPECT_TRUE(status == CUOPT_TERIMINATION_STATUS_OPTIMAL ||
+              status == CUOPT_TERIMINATION_STATUS_INFEASIBLE ||
+              status == CUOPT_TERIMINATION_STATUS_TIME_LIMIT ||
+              status == CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT ||
+              status == CUOPT_TERIMINATION_STATUS_NUMERICAL_ERROR ||
+              status == CUOPT_TERIMINATION_STATUS_NO_TERMINATION);
+}
+
+// =============================================================================
+// Problem Conversion Tests
+// =============================================================================
+
+// Test GPU problem to_optimization_problem (move semantics)
+TEST_F(SolutionInterfaceTest, gpu_problem_to_optimization_problem)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  int orig_n_vars        = problem->get_n_variables();
+  int orig_n_constraints = problem->get_n_constraints();
+
+  // Convert to concrete optimization_problem_t
+  auto concrete_problem = problem->to_optimization_problem();
+
+  EXPECT_EQ(concrete_problem.get_n_variables(), orig_n_vars);
+  EXPECT_EQ(concrete_problem.get_n_constraints(), orig_n_constraints);
+}
+
+// Test CPU problem to_optimization_problem (copies data to GPU)
+TEST_F(SolutionInterfaceTest, cpu_problem_to_optimization_problem)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+
+  // CPU problem needs a handle to convert to GPU
+  raft::handle_t handle;
+  auto problem = std::make_unique<cpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  int orig_n_vars        = problem->get_n_variables();
+  int orig_n_constraints = problem->get_n_constraints();
+
+  // Convert to concrete GPU-backed optimization_problem_t
+  auto concrete_problem = problem->to_optimization_problem();
+
+  EXPECT_EQ(concrete_problem.get_n_variables(), orig_n_vars);
+  EXPECT_EQ(concrete_problem.get_n_constraints(), orig_n_constraints);
+}
+
+// Test MPS data model to optimization problem conversion
+TEST_F(SolutionInterfaceTest, mps_data_model_to_optimization_problem)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = mps_data_model_to_optimization_problem(&handle, mps_data);
+
+  EXPECT_EQ(problem.get_n_variables(), mps_data.get_n_variables());
+  EXPECT_EQ(problem.get_n_constraints(), mps_data.get_n_constraints());
+  EXPECT_EQ(problem.get_nnz(), mps_data.get_nnz());
+}
+
+// =============================================================================
+// Solution Conversion Tests
+// =============================================================================
+
+// Test CPU LP solution to GPU conversion
+TEST_F(SolutionInterfaceTest, cpu_lp_solution_to_gpu)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+
+  // Create CPU problem and solve
+  auto cpu_problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(cpu_problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  // Solve on CPU (simulated remote)
+  auto cpu_solution = cpu_problem->solve_lp_remote(settings);
+  ASSERT_NE(cpu_solution, nullptr);
+
+  // Get original values (use explicit id=0 to avoid ambiguity with overloaded method)
+  double orig_objective   = cpu_solution->get_objective_value(0);
+  int orig_status         = cpu_solution->get_termination_status_int();
+  auto& orig_primal       = cpu_solution->get_primal_solution_host();
+  size_t orig_primal_size = orig_primal.size();
+
+  // Convert to GPU solution
+  auto gpu_solution = cpu_solution->to_gpu_solution(rmm::cuda_stream_per_thread);
+
+  // Verify values match
+  EXPECT_NEAR(gpu_solution.get_objective_value(0), orig_objective, 1e-9);
+  EXPECT_EQ(static_cast<int>(gpu_solution.get_termination_status()), orig_status);
+  EXPECT_EQ(gpu_solution.get_primal_solution().size(), orig_primal_size);
+}
+
+// Test CPU MIP solution to GPU conversion
+TEST_F(SolutionInterfaceTest, cpu_mip_solution_to_gpu)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(mip_file_);
+
+  // Create CPU problem and solve
+  auto cpu_problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(cpu_problem.get(), mps_data);
+
+  mip_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  // Solve on CPU (simulated remote)
+  auto cpu_solution = cpu_problem->solve_mip_remote(settings);
+  ASSERT_NE(cpu_solution, nullptr);
+
+  // Get original values
+  double orig_objective = cpu_solution->get_objective_value();
+  int orig_status       = cpu_solution->get_termination_status_int();
+
+  // Convert to GPU solution
+  auto gpu_solution = cpu_solution->to_gpu_solution(rmm::cuda_stream_per_thread);
+
+  // Verify values match
+  EXPECT_NEAR(gpu_solution.get_objective_value(), orig_objective, 1e-9);
+  EXPECT_EQ(static_cast<int>(gpu_solution.get_termination_status()), orig_status);
+}
+
+// Test GPU LP solution to Python return type
+TEST_F(SolutionInterfaceTest, gpu_lp_solution_to_python_ret)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_lp(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // Use explicit id=0 to avoid ambiguity with overloaded method
+  double orig_objective = solution->get_objective_value(0);
+
+  // Convert to Python return type
+  auto python_ret = std::move(*solution).to_python_lp_ret();
+
+  // Should be GPU variant (linear_programming_ret_t)
+  EXPECT_TRUE(std::holds_alternative<cuopt::cython::linear_programming_ret_t>(python_ret));
+
+  auto& gpu_ret = std::get<cuopt::cython::linear_programming_ret_t>(python_ret);
+  EXPECT_NEAR(gpu_ret.primal_objective_, orig_objective, 1e-9);
+}
+
+// Test CPU LP solution to Python return type
+TEST_F(SolutionInterfaceTest, cpu_lp_solution_to_python_ret)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+
+  auto cpu_problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(cpu_problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto cpu_solution = cpu_problem->solve_lp_remote(settings);
+  ASSERT_NE(cpu_solution, nullptr);
+
+  // Use explicit id=0 to avoid ambiguity with overloaded method
+  double orig_objective = cpu_solution->get_objective_value(0);
+
+  // Convert to Python return type
+  auto python_ret = std::move(*cpu_solution).to_python_lp_ret();
+
+  // Should be CPU variant (cpu_linear_programming_ret_t)
+  EXPECT_TRUE(std::holds_alternative<cuopt::cython::cpu_linear_programming_ret_t>(python_ret));
+
+  auto& cpu_ret = std::get<cuopt::cython::cpu_linear_programming_ret_t>(python_ret);
+  EXPECT_NEAR(cpu_ret.primal_objective_, orig_objective, 1e-9);
+}
+
+// Test GPU MIP solution to Python return type
+TEST_F(SolutionInterfaceTest, gpu_mip_solution_to_python_ret)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(mip_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  mip_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto solution = solve_mip(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  double orig_objective = solution->get_objective_value();
+
+  // Convert to Python return type
+  auto python_ret = std::move(*solution).to_python_mip_ret();
+
+  // Should be GPU variant (mip_ret_t)
+  EXPECT_TRUE(std::holds_alternative<cuopt::cython::mip_ret_t>(python_ret));
+
+  auto& gpu_ret = std::get<cuopt::cython::mip_ret_t>(python_ret);
+  EXPECT_NEAR(gpu_ret.objective_, orig_objective, 1e-9);
+}
+
+// Test CPU MIP solution to Python return type
+TEST_F(SolutionInterfaceTest, cpu_mip_solution_to_python_ret)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(mip_file_);
+
+  auto cpu_problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(cpu_problem.get(), mps_data);
+
+  mip_solver_settings_t<int, double> settings;
+  settings.time_limit = 60.0;
+
+  auto cpu_solution = cpu_problem->solve_mip_remote(settings);
+  ASSERT_NE(cpu_solution, nullptr);
+
+  double orig_objective = cpu_solution->get_objective_value();
+
+  // Convert to Python return type
+  auto python_ret = std::move(*cpu_solution).to_python_mip_ret();
+
+  // Should be CPU variant (cpu_mip_ret_t)
+  EXPECT_TRUE(std::holds_alternative<cuopt::cython::cpu_mip_ret_t>(python_ret));
+
+  auto& cpu_ret = std::get<cuopt::cython::cpu_mip_ret_t>(python_ret);
+  EXPECT_NEAR(cpu_ret.objective_, orig_objective, 1e-9);
+}
+
+// =============================================================================
+// Warmstart Data Conversion Tests
+// =============================================================================
+
+// Test GPU warmstart to CPU conversion
+TEST_F(SolutionInterfaceTest, gpu_warmstart_to_cpu)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit      = 60.0;
+  settings.iteration_limit = 100;  // Stop early to ensure warmstart data
+
+  auto solution = solve_lp(problem.get(), settings);
+  ASSERT_NE(solution, nullptr);
+
+  // Check if warmstart data is available
+  if (solution->has_warm_start_data()) {
+    // Get warmstart data values
+    auto current_primal = solution->get_current_primal_solution_host();
+    auto current_dual   = solution->get_current_dual_solution_host();
+
+    EXPECT_GT(current_primal.size(), 0u);
+    EXPECT_GT(current_dual.size(), 0u);
+  }
+}
+
+// Test CPU warmstart to GPU conversion
+TEST_F(SolutionInterfaceTest, cpu_warmstart_to_gpu)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+
+  auto cpu_problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(cpu_problem.get(), mps_data);
+
+  pdlp_solver_settings_t<int, double> settings;
+  settings.time_limit      = 60.0;
+  settings.iteration_limit = 100;  // Stop early to ensure warmstart data
+
+  auto cpu_solution = cpu_problem->solve_lp_remote(settings);
+  ASSERT_NE(cpu_solution, nullptr);
+
+  // Check if warmstart data is available
+  if (cpu_solution->has_warm_start_data()) {
+    // Get warmstart data values from CPU solution
+    auto current_primal = cpu_solution->get_current_primal_solution_host();
+    auto current_dual   = cpu_solution->get_current_dual_solution_host();
+
+    EXPECT_GT(current_primal.size(), 0u);
+    EXPECT_GT(current_dual.size(), 0u);
+
+    // Convert solution to GPU and verify warmstart is preserved
+    auto gpu_solution = cpu_solution->to_gpu_solution(rmm::cuda_stream_per_thread);
+
+    // Warmstart data should be available in GPU solution
+    // (it gets copied during conversion)
+  }
+}
+
+// =============================================================================
+// Problem Interface Copy Methods Tests
+// =============================================================================
+
+// Test GPU problem copy_*_to_host methods
+TEST_F(SolutionInterfaceTest, gpu_problem_copy_to_host_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+  raft::handle_t handle;
+
+  auto problem = std::make_unique<gpu_optimization_problem_t<int, double>>(&handle);
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  int n_vars        = problem->get_n_variables();
+  int n_constraints = problem->get_n_constraints();
+  int nnz           = problem->get_nnz();
+
+  // Test copy_objective_coefficients_to_host
+  std::vector<double> obj_coeffs(n_vars);
+  problem->copy_objective_coefficients_to_host(obj_coeffs.data(), n_vars);
+  EXPECT_EQ(obj_coeffs.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_variable_lower_bounds_to_host
+  std::vector<double> var_lb(n_vars);
+  problem->copy_variable_lower_bounds_to_host(var_lb.data(), n_vars);
+  EXPECT_EQ(var_lb.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_variable_upper_bounds_to_host
+  std::vector<double> var_ub(n_vars);
+  problem->copy_variable_upper_bounds_to_host(var_ub.data(), n_vars);
+  EXPECT_EQ(var_ub.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_constraint_bounds_to_host
+  std::vector<double> rhs(n_constraints);
+  problem->copy_constraint_bounds_to_host(rhs.data(), n_constraints);
+  EXPECT_EQ(rhs.size(), static_cast<size_t>(n_constraints));
+
+  // Test copy_constraint_matrix_to_host
+  std::vector<double> values(nnz);
+  std::vector<int> col_indices(nnz);
+  std::vector<int> row_offsets(n_constraints + 1);
+  problem->copy_constraint_matrix_to_host(
+    values.data(), col_indices.data(), row_offsets.data(), nnz, nnz, n_constraints + 1);
+  EXPECT_EQ(values.size(), static_cast<size_t>(nnz));
+}
+
+// Test CPU problem copy_*_to_host methods
+TEST_F(SolutionInterfaceTest, cpu_problem_copy_to_host_methods)
+{
+  auto mps_data = cuopt::mps_parser::parse_mps<int, double>(lp_file_);
+
+  auto problem = std::make_unique<cpu_optimization_problem_t<int, double>>();
+  populate_from_mps_data_model(problem.get(), mps_data);
+
+  int n_vars        = problem->get_n_variables();
+  int n_constraints = problem->get_n_constraints();
+  int nnz           = problem->get_nnz();
+
+  // Test copy_objective_coefficients_to_host
+  std::vector<double> obj_coeffs(n_vars);
+  problem->copy_objective_coefficients_to_host(obj_coeffs.data(), n_vars);
+  EXPECT_EQ(obj_coeffs.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_variable_lower_bounds_to_host
+  std::vector<double> var_lb(n_vars);
+  problem->copy_variable_lower_bounds_to_host(var_lb.data(), n_vars);
+  EXPECT_EQ(var_lb.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_variable_upper_bounds_to_host
+  std::vector<double> var_ub(n_vars);
+  problem->copy_variable_upper_bounds_to_host(var_ub.data(), n_vars);
+  EXPECT_EQ(var_ub.size(), static_cast<size_t>(n_vars));
+
+  // Test copy_constraint_bounds_to_host
+  std::vector<double> rhs(n_constraints);
+  problem->copy_constraint_bounds_to_host(rhs.data(), n_constraints);
+  EXPECT_EQ(rhs.size(), static_cast<size_t>(n_constraints));
+
+  // Test copy_constraint_matrix_to_host
+  std::vector<double> values(nnz);
+  std::vector<int> col_indices(nnz);
+  std::vector<int> row_offsets(n_constraints + 1);
+  problem->copy_constraint_matrix_to_host(
+    values.data(), col_indices.data(), row_offsets.data(), nnz, nnz, n_constraints + 1);
+  EXPECT_EQ(values.size(), static_cast<size_t>(nnz));
+}
+
+}  // namespace cuopt::linear_programming
diff --git a/python/cuopt/cuopt/__init__.py b/python/cuopt/cuopt/__init__.py
index c6e9150c8..7aefb5dd2 100644
--- a/python/cuopt/cuopt/__init__.py
+++ b/python/cuopt/cuopt/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 try:
@@ -9,5 +9,23 @@
     libcuopt.load_library()
     del libcuopt
 
-from cuopt import linear_programming, routing
 from cuopt._version import __git_commit__, __version__, __version_major_minor__
+
+# Lazy imports for linear_programming, routing, and distance_engine modules
+# This allows cuopt to be imported on CPU-only hosts when remote solve is configured
+_submodules = ["linear_programming", "routing", "distance_engine"]
+
+
+def __getattr__(name):
+    """Lazy import submodules to support CPU-only hosts with remote solve."""
+    if name in _submodules:
+        import importlib
+        return importlib.import_module(f"cuopt.{name}")
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+    return __all__ + _submodules
+
+
+__all__ = ["__git_commit__", "__version__", "__version_major_minor__"]
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
index 6688e5166..85a2b80aa 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd
+++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
@@ -119,7 +119,19 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_solution.hpp" namespace "
         PrimalFeasible "cuopt::linear_programming::pdlp_termination_status_t::PrimalFeasible" # noqa
 
 
+cdef extern from "<variant>" namespace "std":
+    # Declare std::variant support
+    cdef cppclass variant[T1, T2]:
+        variant() except +
+        variant(T1&) except +
+        variant(T2&) except +
+
+cdef extern from "<rmm/device_buffer.hpp>" namespace "rmm":
+    cdef cppclass device_buffer:
+        pass
+
 cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace "cuopt::cython": # noqa
+    # GPU-backed LP solution struct (device memory)
     cdef cppclass linear_programming_ret_t:
         unique_ptr[device_buffer] primal_solution_
         unique_ptr[device_buffer] dual_solution_
@@ -155,6 +167,43 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         double solve_time_
         bool solved_by_pdlp_
 
+    # CPU-backed LP solution struct (host memory)
+    cdef cppclass cpu_linear_programming_ret_t:
+        vector[double] primal_solution_
+        vector[double] dual_solution_
+        vector[double] reduced_cost_
+        # PDLP warm start data
+        vector[double] current_primal_solution_
+        vector[double] current_dual_solution_
+        vector[double] initial_primal_average_
+        vector[double] initial_dual_average_
+        vector[double] current_ATY_
+        vector[double] sum_primal_solutions_
+        vector[double] sum_dual_solutions_
+        vector[double] last_restart_duality_gap_primal_solution_
+        vector[double] last_restart_duality_gap_dual_solution_
+        double initial_primal_weight_
+        double initial_step_size_
+        int total_pdlp_iterations_
+        int total_pdhg_iterations_
+        double last_candidate_kkt_score_
+        double last_restart_kkt_score_
+        double sum_solution_weight_
+        int iterations_since_last_restart_
+        # /PDLP warm start data
+        pdlp_termination_status_t termination_status_
+        error_type_t error_status_
+        string error_message_
+        double l2_primal_residual_
+        double l2_dual_residual_
+        double primal_objective_
+        double dual_objective_
+        double gap_
+        int nb_iterations_
+        double solve_time_
+        bool solved_by_pdlp_
+
+    # GPU-backed MIP solution struct (device memory)
     cdef cppclass mip_ret_t:
         unique_ptr[device_buffer] solution_
         mip_termination_status_t termination_status_
@@ -171,10 +220,28 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         int nodes_
         int simplex_iterations_
 
+    # CPU-backed MIP solution struct (host memory)
+    cdef cppclass cpu_mip_ret_t:
+        vector[double] solution_
+        mip_termination_status_t termination_status_
+        error_type_t error_status_
+        string error_message_
+        double objective_
+        double mip_gap_
+        double solution_bound_
+        double total_solve_time_
+        double presolve_time_
+        double max_constraint_violation_
+        double max_int_violation_
+        double max_variable_bound_violation_
+        int nodes_
+        int simplex_iterations_
+
+    # Main return struct using variants
     cdef cppclass solver_ret_t:
         problem_category_t problem_type
-        linear_programming_ret_t lp_ret
-        mip_ret_t mip_ret
+        variant[linear_programming_ret_t, cpu_linear_programming_ret_t] lp_ret
+        variant[mip_ret_t, cpu_mip_ret_t] mip_ret
 
     cdef unique_ptr[solver_ret_t] call_solve(
         data_model_view_t[int, double]* data_model,
@@ -185,3 +252,40 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         vector[data_model_view_t[int, double] *] data_models,
         solver_settings_t[int, double]* solver_settings,
     ) except +
+
+# Variant helper functions - Cython doesn't directly support variant access
+# so we need C++ helper functions
+cdef extern from *:
+    """
+    #include <variant>
+    #include <cuopt/linear_programming/utilities/cython_solve.hpp>
+
+    // Check which alternative is active
+    inline bool holds_linear_programming_ret_t(const std::variant<cuopt::cython::linear_programming_ret_t, cuopt::cython::cpu_linear_programming_ret_t>& v) {
+        return std::holds_alternative<cuopt::cython::linear_programming_ret_t>(v);
+    }
+    inline bool holds_mip_ret_t(const std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t>& v) {
+        return std::holds_alternative<cuopt::cython::mip_ret_t>(v);
+    }
+
+    // Get references to the active alternative
+    inline cuopt::cython::linear_programming_ret_t& get_linear_programming_ret_t(std::variant<cuopt::cython::linear_programming_ret_t, cuopt::cython::cpu_linear_programming_ret_t>& v) {
+        return std::get<cuopt::cython::linear_programming_ret_t>(v);
+    }
+    inline cuopt::cython::cpu_linear_programming_ret_t& get_cpu_linear_programming_ret_t(std::variant<cuopt::cython::linear_programming_ret_t, cuopt::cython::cpu_linear_programming_ret_t>& v) {
+        return std::get<cuopt::cython::cpu_linear_programming_ret_t>(v);
+    }
+    inline cuopt::cython::mip_ret_t& get_mip_ret_t(std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t>& v) {
+        return std::get<cuopt::cython::mip_ret_t>(v);
+    }
+    inline cuopt::cython::cpu_mip_ret_t& get_cpu_mip_ret_t(std::variant<cuopt::cython::mip_ret_t, cuopt::cython::cpu_mip_ret_t>& v) {
+        return std::get<cuopt::cython::cpu_mip_ret_t>(v);
+    }
+    """
+    # Declare helper functions for Cython to use
+    cdef bool holds_linear_programming_ret_t(variant[linear_programming_ret_t, cpu_linear_programming_ret_t]& v)
+    cdef bool holds_mip_ret_t(variant[mip_ret_t, cpu_mip_ret_t]& v)
+    cdef linear_programming_ret_t& get_linear_programming_ret_t(variant[linear_programming_ret_t, cpu_linear_programming_ret_t]& v)
+    cdef cpu_linear_programming_ret_t& get_cpu_linear_programming_ret_t(variant[linear_programming_ret_t, cpu_linear_programming_ret_t]& v)
+    cdef mip_ret_t& get_mip_ret_t(variant[mip_ret_t, cpu_mip_ret_t]& v)
+    cdef cpu_mip_ret_t& get_cpu_mip_ret_t(variant[mip_ret_t, cpu_mip_ret_t]& v)
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
index 8c1c8fdc3..55800bbc9 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
@@ -32,7 +32,17 @@ from cuopt.linear_programming.data_model.data_model_wrapper cimport DataModel
 from cuopt.linear_programming.solver.solver cimport (
     call_batch_solve,
     call_solve,
+    cpu_linear_programming_ret_t,
+    cpu_mip_ret_t,
     error_type_t,
+    get_cpu_linear_programming_ret_t,
+    get_cpu_mip_ret_t,
+    get_linear_programming_ret_t,
+    get_mip_ret_t,
+    holds_linear_programming_ret_t,
+    holds_mip_ret_t,
+    linear_programming_ret_t,
+    mip_ret_t,
     mip_termination_status_t,
     pdlp_solver_mode_t,
     pdlp_termination_status_t,
@@ -107,6 +117,15 @@ cdef char* c_get_string(string in_str):
     return c_string
 
 
+cdef object _vector_to_numpy(const vector[double]& vec):
+    """Convert C++ std::vector<double> to numpy array"""
+    cdef Py_ssize_t size = vec.size()
+    if size == 0:
+        return np.array([], dtype=np.float64)
+    cdef const double* data_ptr = vec.data()
+    return np.asarray(<double[:size]> data_ptr, dtype=np.float64).copy()
+
+
 def get_data_ptr(array):
     if isinstance(array, cudf.Series):
         return array.__cuda_array_interface__['data'][0]
@@ -276,6 +295,7 @@ cdef set_solver_setting(
                 settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution # noqa
             )
         )
+        ws_data = settings.get_pdlp_warm_start_data()
         c_solver_settings.set_pdlp_warm_start_data(
             <const double *> c_current_primal_solution,
             <const double *> c_current_dual_solution,
@@ -286,16 +306,16 @@ cdef set_solver_setting(
             <const double *> c_sum_dual_solutions,
             <const double *> c_last_restart_duality_gap_primal_solution,
             <const double *> c_last_restart_duality_gap_dual_solution,
-            settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution.shape[0], # Primal size # noqa
-            settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution.shape[0], # Dual size # noqa
-            settings.get_pdlp_warm_start_data().initial_primal_weight,
-            settings.get_pdlp_warm_start_data().initial_step_size,
-            settings.get_pdlp_warm_start_data().total_pdlp_iterations,
-            settings.get_pdlp_warm_start_data().total_pdhg_iterations,
-            settings.get_pdlp_warm_start_data().last_candidate_kkt_score,
-            settings.get_pdlp_warm_start_data().last_restart_kkt_score,
-            settings.get_pdlp_warm_start_data().sum_solution_weight,
-            settings.get_pdlp_warm_start_data().iterations_since_last_restart # noqa
+            ws_data.last_restart_duality_gap_primal_solution.shape[0], # Primal size # noqa
+            ws_data.last_restart_duality_gap_dual_solution.shape[0], # Dual size # noqa
+            ws_data.initial_primal_weight,
+            ws_data.initial_step_size,
+            ws_data.total_pdlp_iterations,
+            ws_data.total_pdhg_iterations,
+            ws_data.last_candidate_kkt_score,
+            ws_data.last_restart_kkt_score,
+            ws_data.sum_solution_weight,
+            ws_data.iterations_since_last_restart # noqa
         )
 
 cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
@@ -304,191 +324,297 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
 
     from cuopt.linear_programming.solution.solution import Solution
 
-    sol_ret = move(sol_ret_ptr.get()[0])
+    # Access the solver_ret_t struct
+    cdef solver_ret_t* sol_ret = sol_ret_ptr.get()
+
+    # Declare reference variables at function scope (Cython requirement)
+    cdef mip_ret_t* mip_ptr
+    cdef cpu_mip_ret_t* cpu_mip_ptr
+    cdef linear_programming_ret_t* lp_ptr
+    cdef cpu_linear_programming_ret_t* cpu_lp_ptr
 
     if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa
-        solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.mip_ret.solution_)
-        )
-        termination_status = sol_ret.mip_ret.termination_status_
-        error_status = sol_ret.mip_ret.error_status_
-        error_message = sol_ret.mip_ret.error_message_
-        objective = sol_ret.mip_ret.objective_
-        mip_gap = sol_ret.mip_ret.mip_gap_
-        solution_bound = sol_ret.mip_ret.solution_bound_
-        solve_time = sol_ret.mip_ret.total_solve_time_
-        presolve_time = sol_ret.mip_ret.presolve_time_
-        max_constraint_violation = sol_ret.mip_ret.max_constraint_violation_
-        max_int_violation = sol_ret.mip_ret.max_int_violation_
-        max_variable_bound_violation = sol_ret.mip_ret.max_variable_bound_violation_ # noqa
-        num_nodes = sol_ret.mip_ret.nodes_
-        num_simplex_iterations = sol_ret.mip_ret.simplex_iterations_
-
-        solution = series_from_buf(solution, pa.float64()).to_numpy()
-
-        return Solution(
-            ProblemCategory(sol_ret.problem_type),
-            dict(zip(data_model_obj.get_variable_names(), solution)),
-            solve_time,
-            primal_solution=solution,
-            termination_status=MILPTerminationStatus(termination_status),
-            error_status=ErrorStatus(error_status),
-            error_message=str(error_message),
-            primal_objective=objective,
-            mip_gap=mip_gap,
-            solution_bound=solution_bound,
-            presolve_time=presolve_time,
-            max_variable_bound_violation=max_variable_bound_violation,
-            max_int_violation=max_int_violation,
-            max_constraint_violation=max_constraint_violation,
-            num_nodes=num_nodes,
-            num_simplex_iterations=num_simplex_iterations
-        )
+        # MIP solution - check if GPU or CPU
+        if holds_mip_ret_t(sol_ret.mip_ret):
+            # GPU MIP solution - use device_buffer
+            mip_ptr = &get_mip_ret_t(sol_ret.mip_ret)
+            solution_buf = DeviceBuffer.c_from_unique_ptr(move(mip_ptr.solution_))
+            solution = series_from_buf(solution_buf, pa.float64()).to_numpy()
 
-    else:
-        primal_solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.lp_ret.primal_solution_)
-        )
-        dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
-        reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
-
-        primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy()
-        dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy()
-        reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy()
-
-        termination_status = sol_ret.lp_ret.termination_status_
-        error_status = sol_ret.lp_ret.error_status_
-        error_message = sol_ret.lp_ret.error_message_
-        l2_primal_residual = sol_ret.lp_ret.l2_primal_residual_
-        l2_dual_residual = sol_ret.lp_ret.l2_dual_residual_
-        primal_objective = sol_ret.lp_ret.primal_objective_
-        dual_objective = sol_ret.lp_ret.dual_objective_
-        gap = sol_ret.lp_ret.gap_
-        nb_iterations = sol_ret.lp_ret.nb_iterations_
-        solve_time = sol_ret.lp_ret.solve_time_
-        solved_by_pdlp = sol_ret.lp_ret.solved_by_pdlp_
-
-        # In BatchSolve, we don't get the warm start data
-        if not is_batch:
-            current_primal_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_primal_solution_)
-            )
-            current_dual_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_dual_solution_)
-            )
-            initial_primal_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_primal_average_)
-            )
-            initial_dual_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_dual_average_)
-            )
-            current_ATY = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_ATY_)
-            )
-            sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_primal_solutions_)
-            )
-            sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_dual_solutions_)
-            )
-            last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
-            )
-            last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
+            return Solution(
+                ProblemCategory(sol_ret.problem_type),
+                dict(zip(data_model_obj.get_variable_names(), solution)),
+                mip_ptr.total_solve_time_,
+                primal_solution=solution,
+                termination_status=MILPTerminationStatus(mip_ptr.termination_status_),
+                error_status=ErrorStatus(mip_ptr.error_status_),
+                error_message=mip_ptr.error_message_.decode('utf-8'),
+                primal_objective=mip_ptr.objective_,
+                mip_gap=mip_ptr.mip_gap_,
+                solution_bound=mip_ptr.solution_bound_,
+                presolve_time=mip_ptr.presolve_time_,
+                max_variable_bound_violation=mip_ptr.max_variable_bound_violation_,
+                max_int_violation=mip_ptr.max_int_violation_,
+                max_constraint_violation=mip_ptr.max_constraint_violation_,
+                num_nodes=mip_ptr.nodes_,
+                num_simplex_iterations=mip_ptr.simplex_iterations_
             )
-            initial_primal_weight = sol_ret.lp_ret.initial_primal_weight_
-            initial_step_size = sol_ret.lp_ret.initial_step_size_
-            total_pdlp_iterations = sol_ret.lp_ret.total_pdlp_iterations_
-            total_pdhg_iterations = sol_ret.lp_ret.total_pdhg_iterations_
-            last_candidate_kkt_score = sol_ret.lp_ret.last_candidate_kkt_score_
-            last_restart_kkt_score = sol_ret.lp_ret.last_restart_kkt_score_
-            sum_solution_weight = sol_ret.lp_ret.sum_solution_weight_
-            iterations_since_last_restart = sol_ret.lp_ret.iterations_since_last_restart_ # noqa
-
-            current_primal_solution = series_from_buf(
-                current_primal_solution, pa.float64()
-            ).to_numpy()
-            current_dual_solution = series_from_buf(
-                current_dual_solution, pa.float64()
-            ).to_numpy()
-            initial_primal_average = series_from_buf(
-                initial_primal_average, pa.float64()
-            ).to_numpy()
-            initial_dual_average = series_from_buf(
-                initial_dual_average, pa.float64()
-            ).to_numpy()
-            current_ATY = series_from_buf(
-                current_ATY, pa.float64()
-            ).to_numpy()
-            sum_primal_solutions = series_from_buf(
-                sum_primal_solutions, pa.float64()
-            ).to_numpy()
-            sum_dual_solutions = series_from_buf(
-                sum_dual_solutions, pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_primal_solution = series_from_buf(
-                last_restart_duality_gap_primal_solution,
-                pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_dual_solution = series_from_buf(
-                last_restart_duality_gap_dual_solution,
-                pa.float64()
-            ).to_numpy()
+        else:
+            # CPU MIP solution - use std::vector
+            cpu_mip_ptr = &get_cpu_mip_ret_t(sol_ret.mip_ret)
+            solution = _vector_to_numpy(cpu_mip_ptr.solution_)
 
             return Solution(
                 ProblemCategory(sol_ret.problem_type),
-                dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa
-                solve_time,
-                primal_solution,
-                dual_solution,
-                reduced_cost,
-                current_primal_solution,
-                current_dual_solution,
-                initial_primal_average,
-                initial_dual_average,
-                current_ATY,
-                sum_primal_solutions,
-                sum_dual_solutions,
-                last_restart_duality_gap_primal_solution,
-                last_restart_duality_gap_dual_solution,
-                initial_primal_weight,
-                initial_step_size,
-                total_pdlp_iterations,
-                total_pdhg_iterations,
-                last_candidate_kkt_score,
-                last_restart_kkt_score,
-                sum_solution_weight,
-                iterations_since_last_restart,
-                LPTerminationStatus(termination_status),
-                ErrorStatus(error_status),
-                str(error_message),
-                l2_primal_residual,
-                l2_dual_residual,
-                primal_objective,
-                dual_objective,
-                gap,
-                nb_iterations,
-                solved_by_pdlp,
+                dict(zip(data_model_obj.get_variable_names(), solution)),
+                cpu_mip_ptr.total_solve_time_,
+                primal_solution=solution,
+                termination_status=MILPTerminationStatus(cpu_mip_ptr.termination_status_),
+                error_status=ErrorStatus(cpu_mip_ptr.error_status_),
+                error_message=cpu_mip_ptr.error_message_.decode('utf-8'),
+                primal_objective=cpu_mip_ptr.objective_,
+                mip_gap=cpu_mip_ptr.mip_gap_,
+                solution_bound=cpu_mip_ptr.solution_bound_,
+                presolve_time=cpu_mip_ptr.presolve_time_,
+                max_variable_bound_violation=cpu_mip_ptr.max_variable_bound_violation_,
+                max_int_violation=cpu_mip_ptr.max_int_violation_,
+                max_constraint_violation=cpu_mip_ptr.max_constraint_violation_,
+                num_nodes=cpu_mip_ptr.nodes_,
+                num_simplex_iterations=cpu_mip_ptr.simplex_iterations_
             )
-        return Solution(
-            problem_category=ProblemCategory(sol_ret.problem_type),
-            vars=dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa
-            solve_time=solve_time,
-            primal_solution=primal_solution,
-            dual_solution=dual_solution,
-            reduced_cost=reduced_cost,
-            termination_status=LPTerminationStatus(termination_status),
-            error_status=ErrorStatus(error_status),
-            error_message=str(error_message),
-            primal_residual=l2_primal_residual,
-            dual_residual=l2_dual_residual,
-            primal_objective=primal_objective,
-            dual_objective=dual_objective,
-            gap=gap,
-            nb_iterations=nb_iterations,
-            solved_by_pdlp=solved_by_pdlp,
-        )
+
+    else:
+        # LP solution - check if GPU or CPU
+        if holds_linear_programming_ret_t(sol_ret.lp_ret):
+            # GPU LP solution - use device_buffer
+            lp_ptr = &get_linear_programming_ret_t(sol_ret.lp_ret)
+
+            primal_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.primal_solution_))
+            primal_solution = series_from_buf(primal_buf, pa.float64()).to_numpy()
+
+            dual_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.dual_solution_))
+            dual_solution = series_from_buf(dual_buf, pa.float64()).to_numpy()
+
+            reduced_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.reduced_cost_))
+            reduced_cost = series_from_buf(reduced_buf, pa.float64()).to_numpy()
+
+            # Extract warm start data if available (check if buffers are non-empty)
+            if not is_batch:
+                # Check if warm start data exists by checking buffer size
+                if lp_ptr.current_primal_solution_.get()[0].size() > 0:
+                    current_primal_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.current_primal_solution_))
+                    current_primal = series_from_buf(current_primal_buf, pa.float64()).to_numpy()
+
+                    current_dual_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.current_dual_solution_))
+                    current_dual = series_from_buf(current_dual_buf, pa.float64()).to_numpy()
+
+                    initial_primal_avg_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.initial_primal_average_))
+                    initial_primal_avg = series_from_buf(initial_primal_avg_buf, pa.float64()).to_numpy()
+
+                    initial_dual_avg_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.initial_dual_average_))
+                    initial_dual_avg = series_from_buf(initial_dual_avg_buf, pa.float64()).to_numpy()
+
+                    current_ATY_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.current_ATY_))
+                    current_ATY = series_from_buf(current_ATY_buf, pa.float64()).to_numpy()
+
+                    sum_primal_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.sum_primal_solutions_))
+                    sum_primal = series_from_buf(sum_primal_buf, pa.float64()).to_numpy()
+
+                    sum_dual_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.sum_dual_solutions_))
+                    sum_dual = series_from_buf(sum_dual_buf, pa.float64()).to_numpy()
+
+                    last_restart_primal_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.last_restart_duality_gap_primal_solution_))
+                    last_restart_primal = series_from_buf(last_restart_primal_buf, pa.float64()).to_numpy()
+
+                    last_restart_dual_buf = DeviceBuffer.c_from_unique_ptr(move(lp_ptr.last_restart_duality_gap_dual_solution_))
+                    last_restart_dual = series_from_buf(last_restart_dual_buf, pa.float64()).to_numpy()
+
+                    initial_primal_weight = lp_ptr.initial_primal_weight_
+                    initial_step_size = lp_ptr.initial_step_size_
+                    total_pdlp_iters = lp_ptr.total_pdlp_iterations_
+                    total_pdhg_iters = lp_ptr.total_pdhg_iterations_
+                    last_candidate_kkt = lp_ptr.last_candidate_kkt_score_
+                    last_restart_kkt = lp_ptr.last_restart_kkt_score_
+                    sum_weight = lp_ptr.sum_solution_weight_
+                    iters_since_restart = lp_ptr.iterations_since_last_restart_
+                else:
+                    current_primal = None
+                    current_dual = None
+                    initial_primal_avg = None
+                    initial_dual_avg = None
+                    current_ATY = None
+                    sum_primal = None
+                    sum_dual = None
+                    last_restart_primal = None
+                    last_restart_dual = None
+                    initial_primal_weight = 0.0
+                    initial_step_size = 0.0
+                    total_pdlp_iters = 0
+                    total_pdhg_iters = 0
+                    last_candidate_kkt = 0.0
+                    last_restart_kkt = 0.0
+                    sum_weight = 0.0
+                    iters_since_restart = 0
+
+                return Solution(
+                    ProblemCategory(sol_ret.problem_type),
+                    dict(zip(data_model_obj.get_variable_names(), primal_solution)),
+                    lp_ptr.solve_time_,
+                    primal_solution,
+                    dual_solution,
+                    reduced_cost,
+                    current_primal,
+                    current_dual,
+                    initial_primal_avg,
+                    initial_dual_avg,
+                    current_ATY,
+                    sum_primal,
+                    sum_dual,
+                    last_restart_primal,
+                    last_restart_dual,
+                    initial_primal_weight,
+                    initial_step_size,
+                    total_pdlp_iters,
+                    total_pdhg_iters,
+                    last_candidate_kkt,
+                    last_restart_kkt,
+                    sum_weight,
+                    iters_since_restart,
+                    LPTerminationStatus(lp_ptr.termination_status_),
+                    ErrorStatus(lp_ptr.error_status_),
+                    lp_ptr.error_message_.decode('utf-8'),
+                    lp_ptr.l2_primal_residual_,
+                    lp_ptr.l2_dual_residual_,
+                    lp_ptr.primal_objective_,
+                    lp_ptr.dual_objective_,
+                    lp_ptr.gap_,
+                    lp_ptr.nb_iterations_,
+                    lp_ptr.solved_by_pdlp_,
+                )
+            else:
+                # Batch mode - simpler return structure
+                return Solution(
+                    problem_category=ProblemCategory(sol_ret.problem_type),
+                    vars=dict(zip(data_model_obj.get_variable_names(), primal_solution)),
+                    solve_time=lp_ptr.solve_time_,
+                    primal_solution=primal_solution,
+                    dual_solution=dual_solution,
+                    reduced_cost=reduced_cost,
+                    termination_status=LPTerminationStatus(lp_ptr.termination_status_),
+                    error_status=ErrorStatus(lp_ptr.error_status_),
+                    error_message=lp_ptr.error_message_.decode('utf-8'),
+                    primal_residual=lp_ptr.l2_primal_residual_,
+                    dual_residual=lp_ptr.l2_dual_residual_,
+                    primal_objective=lp_ptr.primal_objective_,
+                    dual_objective=lp_ptr.dual_objective_,
+                    gap=lp_ptr.gap_,
+                    nb_iterations=lp_ptr.nb_iterations_,
+                    solved_by_pdlp=lp_ptr.solved_by_pdlp_,
+                )
+
+        else:
+            # CPU LP solution - use std::vector
+            cpu_lp_ptr = &get_cpu_linear_programming_ret_t(sol_ret.lp_ret)
+
+            primal_solution = _vector_to_numpy(cpu_lp_ptr.primal_solution_)
+            dual_solution = _vector_to_numpy(cpu_lp_ptr.dual_solution_)
+            reduced_cost = _vector_to_numpy(cpu_lp_ptr.reduced_cost_)
+
+            # Extract warm start data if available
+            if not is_batch:
+                # Check if warm start data exists by checking vector size
+                if cpu_lp_ptr.current_primal_solution_.size() > 0:
+                    current_primal = _vector_to_numpy(cpu_lp_ptr.current_primal_solution_)
+                    current_dual = _vector_to_numpy(cpu_lp_ptr.current_dual_solution_)
+                    initial_primal_avg = _vector_to_numpy(cpu_lp_ptr.initial_primal_average_)
+                    initial_dual_avg = _vector_to_numpy(cpu_lp_ptr.initial_dual_average_)
+                    current_ATY = _vector_to_numpy(cpu_lp_ptr.current_ATY_)
+                    sum_primal = _vector_to_numpy(cpu_lp_ptr.sum_primal_solutions_)
+                    sum_dual = _vector_to_numpy(cpu_lp_ptr.sum_dual_solutions_)
+                    last_restart_primal = _vector_to_numpy(cpu_lp_ptr.last_restart_duality_gap_primal_solution_)
+                    last_restart_dual = _vector_to_numpy(cpu_lp_ptr.last_restart_duality_gap_dual_solution_)
+                    initial_primal_weight = cpu_lp_ptr.initial_primal_weight_
+                    initial_step_size = cpu_lp_ptr.initial_step_size_
+                    total_pdlp_iters = cpu_lp_ptr.total_pdlp_iterations_
+                    total_pdhg_iters = cpu_lp_ptr.total_pdhg_iterations_
+                    last_candidate_kkt = cpu_lp_ptr.last_candidate_kkt_score_
+                    last_restart_kkt = cpu_lp_ptr.last_restart_kkt_score_
+                    sum_weight = cpu_lp_ptr.sum_solution_weight_
+                    iters_since_restart = cpu_lp_ptr.iterations_since_last_restart_
+                else:
+                    current_primal = None
+                    current_dual = None
+                    initial_primal_avg = None
+                    initial_dual_avg = None
+                    current_ATY = None
+                    sum_primal = None
+                    sum_dual = None
+                    last_restart_primal = None
+                    last_restart_dual = None
+                    initial_primal_weight = 0.0
+                    initial_step_size = 0.0
+                    total_pdlp_iters = 0
+                    total_pdhg_iters = 0
+                    last_candidate_kkt = 0.0
+                    last_restart_kkt = 0.0
+                    sum_weight = 0.0
+                    iters_since_restart = 0
+
+                return Solution(
+                    ProblemCategory(sol_ret.problem_type),
+                    dict(zip(data_model_obj.get_variable_names(), primal_solution)),
+                    cpu_lp_ptr.solve_time_,
+                    primal_solution,
+                    dual_solution,
+                    reduced_cost,
+                    current_primal,
+                    current_dual,
+                    initial_primal_avg,
+                    initial_dual_avg,
+                    current_ATY,
+                    sum_primal,
+                    sum_dual,
+                    last_restart_primal,
+                    last_restart_dual,
+                    initial_primal_weight,
+                    initial_step_size,
+                    total_pdlp_iters,
+                    total_pdhg_iters,
+                    last_candidate_kkt,
+                    last_restart_kkt,
+                    sum_weight,
+                    iters_since_restart,
+                    LPTerminationStatus(cpu_lp_ptr.termination_status_),
+                    ErrorStatus(cpu_lp_ptr.error_status_),
+                    cpu_lp_ptr.error_message_.decode('utf-8'),
+                    cpu_lp_ptr.l2_primal_residual_,
+                    cpu_lp_ptr.l2_dual_residual_,
+                    cpu_lp_ptr.primal_objective_,
+                    cpu_lp_ptr.dual_objective_,
+                    cpu_lp_ptr.gap_,
+                    cpu_lp_ptr.nb_iterations_,
+                    cpu_lp_ptr.solved_by_pdlp_,
+                )
+            else:
+                # Batch mode - simpler return structure
+                return Solution(
+                    problem_category=ProblemCategory(sol_ret.problem_type),
+                    vars=dict(zip(data_model_obj.get_variable_names(), primal_solution)),
+                    solve_time=cpu_lp_ptr.solve_time_,
+                    primal_solution=primal_solution,
+                    dual_solution=dual_solution,
+                    reduced_cost=reduced_cost,
+                    termination_status=LPTerminationStatus(cpu_lp_ptr.termination_status_),
+                    error_status=ErrorStatus(cpu_lp_ptr.error_status_),
+                    error_message=cpu_lp_ptr.error_message_.decode('utf-8'),
+                    primal_residual=cpu_lp_ptr.l2_primal_residual_,
+                    dual_residual=cpu_lp_ptr.l2_dual_residual_,
+                    primal_objective=cpu_lp_ptr.primal_objective_,
+                    dual_objective=cpu_lp_ptr.dual_objective_,
+                    gap=cpu_lp_ptr.gap_,
+                    nb_iterations=cpu_lp_ptr.nb_iterations_,
+                    solved_by_pdlp=cpu_lp_ptr.solved_by_pdlp_,
+                )
 
 
 def Solve(py_data_model_obj, settings, mip=False):
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py b/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py
new file mode 100644
index 000000000..61f905b84
--- /dev/null
+++ b/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py
@@ -0,0 +1,452 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Tests for CPU-only execution mode.
+
+These tests verify that cuOpt can run on a CPU host without GPU access,
+simulating remote execution scenarios. The tests set CUDA_VISIBLE_DEVICES=""
+and enable remote execution mode to force CPU-only operation.
+
+This tests:
+- Problem creation on CPU backend
+- LP solving with CPU backend
+- MIP solving with CPU backend
+- Solution retrieval without CUDA errors
+- Warmstart data handling on CPU
+"""
+
+import os
+import pytest
+import subprocess
+import sys
+
+
+# Get the path to the test MPS files
+RAPIDS_DATASET_ROOT_DIR = os.environ.get(
+    "RAPIDS_DATASET_ROOT_DIR", "/home/datasets/cuopt"
+)
+
+
+class TestCPUOnlyExecution:
+    """Tests that run with CUDA_VISIBLE_DEVICES="" to simulate CPU-only hosts."""
+
+    @pytest.fixture
+    def cpu_only_env(self):
+        """Create environment with no GPU access and remote execution enabled."""
+        env = os.environ.copy()
+        env["CUDA_VISIBLE_DEVICES"] = ""
+        env["CUOPT_REMOTE_HOST"] = "localhost"
+        env["CUOPT_REMOTE_PORT"] = (
+            "12345"  # Fake port, remote not actually called
+        )
+        return env
+
+    def test_lp_solve_cpu_only(self, cpu_only_env):
+        """Test LP solving works in CPU-only mode."""
+        test_script = f'''
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["CUOPT_REMOTE_HOST"] = "localhost"
+os.environ["CUOPT_REMOTE_PORT"] = "12345"
+
+from cuopt import linear_programming
+import cuopt_mps_parser
+
+# Parse MPS file
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/linear_programming/afiro_original.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+
+# Create solver settings
+settings = linear_programming.SolverSettings()
+
+# Solve - this should use CPU backend due to remote execution mode
+try:
+    solution = linear_programming.Solve(data_model, settings)
+
+    # Verify we can access solution properties
+    status = solution.get_termination_status()
+    objective = solution.get_primal_objective()
+    solve_time = solution.get_solve_time()
+    primal = solution.get_primal_solution()
+
+    print(f"Status: {{status}}")
+    print(f"Objective: {{objective}}")
+    print(f"Solve time: {{solve_time}}")
+    print(f"Primal size: {{len(primal)}}")
+    print("CPU-only LP solve: PASSED")
+except Exception as e:
+    print(f"CPU-only LP solve: FAILED - {{e}}")
+    raise
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
+
+    def test_mip_solve_cpu_only(self, cpu_only_env):
+        """Test MIP solving works in CPU-only mode."""
+        test_script = f'''
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["CUOPT_REMOTE_HOST"] = "localhost"
+os.environ["CUOPT_REMOTE_PORT"] = "12345"
+
+from cuopt import linear_programming
+from cuopt.linear_programming.solver.solver_parameters import CUOPT_TIME_LIMIT
+import cuopt_mps_parser
+
+# Parse MPS file (actual MIP problem with integer variables)
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/mip/bb_optimality.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+
+# Create solver settings
+settings = linear_programming.SolverSettings()
+settings.set_parameter(CUOPT_TIME_LIMIT, 60.0)
+
+# Solve - MIP will be detected from integer variables in the problem
+try:
+    solution = linear_programming.Solve(data_model, settings)
+
+    # Verify we can access solution properties
+    status = solution.get_termination_status()
+    objective = solution.get_primal_objective()
+    solve_time = solution.get_solve_time()
+    solution_values = solution.get_primal_solution()
+
+    print(f"Status: {{status}}")
+    print(f"Objective: {{objective}}")
+    print(f"Solve time: {{solve_time}}")
+    print(f"Solution size: {{len(solution_values)}}")
+    print("CPU-only MIP solve: PASSED")
+except Exception as e:
+    print(f"CPU-only MIP solve: FAILED - {{e}}")
+    raise
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
+
+    def test_lp_dual_solution_cpu_only(self, cpu_only_env):
+        """Test LP dual solution retrieval works in CPU-only mode."""
+        test_script = f'''
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["CUOPT_REMOTE_HOST"] = "localhost"
+os.environ["CUOPT_REMOTE_PORT"] = "12345"
+
+from cuopt import linear_programming
+import cuopt_mps_parser
+
+# Parse MPS file
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/linear_programming/afiro_original.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+
+# Create solver settings
+settings = linear_programming.SolverSettings()
+
+try:
+    solution = linear_programming.Solve(data_model, settings)
+
+    # Verify LP-specific properties are accessible
+    dual_solution = solution.get_dual_solution()
+    dual_objective = solution.get_dual_objective()
+    reduced_costs = solution.get_reduced_cost()
+
+    print(f"Dual solution size: {{len(dual_solution)}}")
+    print(f"Dual objective: {{dual_objective}}")
+    print(f"Reduced costs size: {{len(reduced_costs)}}")
+    print("CPU-only LP dual: PASSED")
+except Exception as e:
+    print(f"CPU-only LP dual: FAILED - {{e}}")
+    raise
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
+
+    def test_warmstart_cpu_only(self, cpu_only_env):
+        """Test warmstart data handling works in CPU-only mode."""
+        test_script = f'''
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["CUOPT_REMOTE_HOST"] = "localhost"
+os.environ["CUOPT_REMOTE_PORT"] = "12345"
+
+from cuopt import linear_programming
+from cuopt.linear_programming.solver.solver_parameters import (
+    CUOPT_METHOD,
+    CUOPT_ITERATION_LIMIT,
+)
+from cuopt.linear_programming.solver_settings import SolverMethod
+import cuopt_mps_parser
+
+# Parse MPS file
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/linear_programming/afiro_original.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+
+# Create solver settings with iteration limit to get warmstart data
+settings = linear_programming.SolverSettings()
+settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
+settings.set_parameter(CUOPT_ITERATION_LIMIT, 100)
+
+try:
+    # First solve
+    solution1 = linear_programming.Solve(data_model, settings)
+    warmstart_data = solution1.get_pdlp_warm_start_data()
+
+    if warmstart_data is not None:
+        print(f"Got warmstart data")
+
+        # Second solve with warmstart
+        settings.set_pdlp_warm_start_data(warmstart_data)
+        settings.set_parameter(CUOPT_ITERATION_LIMIT, 200)
+        solution2 = linear_programming.Solve(data_model, settings)
+
+        print(f"Second solve completed")
+        print("CPU-only warmstart: PASSED")
+    else:
+        print("No warmstart data available (expected for small iteration count)")
+        print("CPU-only warmstart: PASSED")
+except Exception as e:
+    print(f"CPU-only warmstart: FAILED - {{e}}")
+    raise
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
+
+
+class TestCuoptCliCPUOnly:
+    """Tests that cuopt_cli can run without CUDA resources in remote execution mode."""
+
+    @pytest.fixture
+    def cpu_only_env(self):
+        """Create environment with no GPU access and remote execution enabled."""
+        env = os.environ.copy()
+        env["CUDA_VISIBLE_DEVICES"] = ""
+        env["CUOPT_REMOTE_HOST"] = "localhost"
+        env["CUOPT_REMOTE_PORT"] = "12345"
+        return env
+
+    def _find_cuopt_cli(self):
+        """Try to find cuopt_cli executable."""
+        import shutil
+
+        # Check common locations
+        locations = [
+            shutil.which("cuopt_cli"),  # In PATH
+            "./cuopt_cli",  # Current directory
+            "../cpp/build/cuopt_cli",  # Build directory from python tests
+            "../../cpp/build/cuopt_cli",  # Another common relative path
+        ]
+
+        for loc in locations:
+            if loc and os.path.isfile(loc) and os.access(loc, os.X_OK):
+                return os.path.abspath(loc)
+
+        # Try to find in conda environment
+        conda_prefix = os.environ.get("CONDA_PREFIX", "")
+        if conda_prefix:
+            conda_cli = os.path.join(conda_prefix, "bin", "cuopt_cli")
+            if os.path.isfile(conda_cli):
+                return conda_cli
+
+        return None
+
+    def test_cuopt_cli_lp_cpu_only(self, cpu_only_env):
+        """Test cuopt_cli LP solve runs without CUDA initialization in CPU-only mode."""
+        cuopt_cli = self._find_cuopt_cli()
+        if cuopt_cli is None:
+            pytest.skip("cuopt_cli not found")
+
+        mps_file = (
+            f"{RAPIDS_DATASET_ROOT_DIR}/linear_programming/afiro_original.mps"
+        )
+        if not os.path.exists(mps_file):
+            pytest.skip(f"Test file not found: {mps_file}")
+
+        cmd = [cuopt_cli, mps_file, "--time-limit", "60"]
+        print(f"Running: {' '.join(cmd)}")
+        print(
+            "Environment: CUDA_VISIBLE_DEVICES='' CUOPT_REMOTE_HOST=localhost"
+        )
+
+        result = subprocess.run(
+            cmd,
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        print("STDOUT:", result.stdout)
+        print("STDERR:", result.stderr)
+        print("Return code:", result.returncode)
+
+        # Check that there are no CUDA initialization errors
+        combined_output = result.stdout + result.stderr
+        cuda_errors = [
+            "CUDA error",
+            "cudaErrorNoDevice",
+            "no CUDA-capable device",
+            "CUDA driver version is insufficient",
+            "CUDA initialization failed",
+        ]
+
+        for error in cuda_errors:
+            assert error not in combined_output, (
+                f"Found CUDA error '{error}' in output - "
+                f"cuopt_cli should not require CUDA in remote execution mode"
+            )
+
+        # The solve should complete (with dummy remote results)
+        # Exit code 0 means success
+        assert result.returncode == 0, (
+            f"cuopt_cli failed with return code {result.returncode}"
+        )
+
+        print("cuopt_cli LP CPU-only test: PASSED")
+
+    def test_cuopt_cli_mip_cpu_only(self, cpu_only_env):
+        """Test cuopt_cli MIP solve runs without CUDA initialization in CPU-only mode."""
+        cuopt_cli = self._find_cuopt_cli()
+        if cuopt_cli is None:
+            pytest.skip("cuopt_cli not found")
+
+        mps_file = f"{RAPIDS_DATASET_ROOT_DIR}/mip/bb_optimality.mps"
+        if not os.path.exists(mps_file):
+            pytest.skip(f"Test file not found: {mps_file}")
+
+        cmd = [cuopt_cli, mps_file, "--time-limit", "60"]
+        print(f"Running: {' '.join(cmd)}")
+
+        result = subprocess.run(
+            cmd,
+            env=cpu_only_env,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        print("STDOUT:", result.stdout)
+        print("STDERR:", result.stderr)
+        print("Return code:", result.returncode)
+
+        # Check for CUDA errors
+        combined_output = result.stdout + result.stderr
+        cuda_errors = [
+            "CUDA error",
+            "cudaErrorNoDevice",
+            "no CUDA-capable device",
+        ]
+
+        for error in cuda_errors:
+            assert error not in combined_output, (
+                f"Found CUDA error '{error}' - should not require CUDA in remote mode"
+            )
+
+        assert result.returncode == 0
+        print("cuopt_cli MIP CPU-only test: PASSED")
+
+
+class TestSolutionInterfacePolymorphism:
+    """Tests for solution interface polymorphic methods."""
+
+    def test_lp_solution_has_dual_info(self):
+        """Test that LP solutions have dual solution info."""
+        test_script = f'''
+from cuopt import linear_programming
+import cuopt_mps_parser
+
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/linear_programming/afiro_original.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+settings = linear_programming.SolverSettings()
+
+solution = linear_programming.Solve(data_model, settings)
+
+# LP solutions should have dual info
+dual = solution.get_dual_solution()
+assert dual is not None, "Dual solution is None"
+assert len(dual) > 0, "Dual solution is empty"
+
+reduced_costs = solution.get_reduced_cost()
+assert reduced_costs is not None, "Reduced costs is None"
+assert len(reduced_costs) > 0, "Reduced costs is empty"
+
+print("LP solution dual info test: PASSED")
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
+
+    def test_mip_solution_has_mip_info(self):
+        """Test that MIP solutions have MIP-specific info."""
+        test_script = f'''
+from cuopt import linear_programming
+from cuopt.linear_programming.solver.solver_parameters import CUOPT_TIME_LIMIT
+import cuopt_mps_parser
+
+# Use actual MIP file with integer variables
+mps_file = "{RAPIDS_DATASET_ROOT_DIR}/mip/bb_optimality.mps"
+data_model = cuopt_mps_parser.ParseMps(mps_file)
+settings = linear_programming.SolverSettings()
+settings.set_parameter(CUOPT_TIME_LIMIT, 60.0)
+
+# Solve - MIP will be detected from integer variables
+solution = linear_programming.Solve(data_model, settings)
+
+# MIP solutions should have MIP-specific info via get_milp_stats()
+milp_stats = solution.get_milp_stats()
+mip_gap = milp_stats["mip_gap"]
+solution_bound = milp_stats["solution_bound"]
+
+# Just verify we can call these without errors
+print(f"MIP gap: {{mip_gap}}")
+print(f"Solution bound: {{solution_bound}}")
+print("MIP solution info test: PASSED")
+'''
+        result = subprocess.run(
+            [sys.executable, "-c", test_script],
+            capture_output=True,
+            text=True,
+        )
+        print(result.stdout)
+        print(result.stderr)
+        assert result.returncode == 0, f"Test failed: {result.stderr}"
+        assert "PASSED" in result.stdout
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index 09f3c42f4..af002dce2 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import os
@@ -499,6 +499,7 @@ def test_warm_start():
     data_model_obj = cuopt_mps_parser.ParseMps(file_path)
 
     settings = solver_settings.SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
     settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2)
     settings.set_optimality_tolerance(1e-3)
     settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False)
diff --git a/python/cuopt/cuopt/utilities/utils.py b/python/cuopt/cuopt/utilities/utils.py
index b92968d0e..aec8a7e57 100644
--- a/python/cuopt/cuopt/utilities/utils.py
+++ b/python/cuopt/cuopt/utilities/utils.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
@@ -6,12 +6,6 @@
 import cudf
 import pylibcudf as plc
 
-from cuopt.linear_programming.solver.solver_parameters import (
-    CUOPT_ABSOLUTE_PRIMAL_TOLERANCE,
-    CUOPT_MIP_INTEGRALITY_TOLERANCE,
-    CUOPT_RELATIVE_PRIMAL_TOLERANCE,
-)
-
 
 def series_from_buf(buf, dtype):
     """Helper function to create a cudf series from a buffer.
@@ -39,6 +33,10 @@ def series_from_buf(buf, dtype):
 
 
 def validate_variable_bounds(data, settings, solution):
+    from cuopt.linear_programming.solver.solver_parameters import (
+        CUOPT_MIP_INTEGRALITY_TOLERANCE,
+    )
+
     integrality_tolerance = settings.get_parameter(
         CUOPT_MIP_INTEGRALITY_TOLERANCE
     )
@@ -110,6 +108,11 @@ def validate_objective_sanity(data, solution, cost, tolerance):
 
 
 def check_solution(data, setting, solution, cost):
+    from cuopt.linear_programming.solver.solver_parameters import (
+        CUOPT_ABSOLUTE_PRIMAL_TOLERANCE,
+        CUOPT_RELATIVE_PRIMAL_TOLERANCE,
+    )
+
     # check size of the solution matches variable size
     assert len(solution) == len(data.get_variable_types())