NVIDIA · tmckayus · Jan 27, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/MEMORY_MODEL_SUMMARY.md b/MEMORY_MODEL_SUMMARY.md
@@ -0,0 +1,78 @@
+# Memory Model Summary
+
+This document describes how memory is handled for **local** vs **remote** solves.
+
+## Core idea
+
+The solver now supports **two memory modes** for problem input and solution output:
+
+- **Device (GPU) memory**: used for local solves.
+- **Host (CPU) memory**: used for remote solves.
+
+A non-owning `data_model_view_t` (host or device) is the entry point that drives the path:
+- Device view → local GPU solve
+- Host view + `CUOPT_REMOTE_*` → remote solve path
+
+## Local solve (GPU memory)
+
+**C++ entry points** (`solve_lp`, `solve_mip` in `cpp/src/linear_programming/solve.cu` and
+`cpp/src/mip/solve.cu`) behave as follows:
+
+1. If the view is device memory, the view is converted into an `optimization_problem_t` and
+   solved locally.
+2. If the view is **host memory**, the data is copied **CPU → GPU** and solved locally.
+   This path requires a valid `raft::handle_t`.
+3. Solutions are returned in **device memory**, and wrappers expose device buffers.
+
+**Python / Cython (`python/cuopt/.../solver_wrapper.pyx`)**:
+- `DataModel` is **host-only**: the Cython wrapper accepts `np.ndarray` inputs and raises
+  if GPU-backed objects are provided.
+- For local solves, host data is **copied to GPU** when building the
+  `optimization_problem_t` (requires a valid `raft::handle_t`).
+- The solution is wrapped into `rmm::device_buffer` and converted to NumPy arrays via
+  `series_from_buf`.
+
+**CLI (`cpp/cuopt_cli.cpp`)**:
+- Initializes CUDA/RMM for local solve paths.
+- Uses `raft::handle_t` and GPU memory as usual.
+
+## Remote solve (CPU memory)
+
+Remote solve is enabled when **both** `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` are set.
+This is detected early in the solve path.
+
+**C++ entry points**:
+- `solve_lp` / `solve_mip` check `get_remote_solve_config()` first.
+- If input data is on **GPU** and remote is enabled, it is copied to CPU for serialization.
+- If input data is already on **CPU**, it is passed directly to `solve_*_remote`.
+- Remote solve returns **host vectors** and sets `is_device_memory = false`.
+
+**Remote stub implementation** (`cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp`):
+- Returns **dummy host solutions** (all zeros).
+- Sets termination stats to **finite values** (no NaNs) for predictable output.
+
+**Python / Cython (`python/cuopt/.../solver_wrapper.pyx`)**:
+- **Input handling**: builds a `data_model_view_t` from the Python `DataModel` before calling C++.
+- **Solution handling**: for remote solves, the solution is **host memory**, so NumPy arrays
+  are built directly from host vectors and **avoid `rmm::device_buffer`** (no CUDA).
+
+**CLI (`cpp/cuopt_cli.cpp`)**:
+- Detects remote solve **before** any CUDA initialization.
+- Skips `raft::handle_t` creation and GPU setup when remote is enabled.
+- Builds the problem in **host memory** for remote solves.
+
+## Batch solve
+
+Batch solve uses the same memory model:
+
+- **Local batch**: GPU memory, with CUDA resources and PDLP/dual simplex paths.
+- **Remote batch**: each problem is routed through `solve_lp_remote` or `solve_mip_remote`
+  and returns host data. If inputs are already on GPU, they are copied to host first.
+
+## Expected outputs for remote stubs
+
+- Termination status: `Optimal`
+- Objective values: `0.0`
+- Primal/dual/reduced-cost vectors: zero-filled host arrays
+
+This is useful for verifying the **CPU-only data path** without a remote service.
@@ -1,13 +1,16 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/optimization_problem_conversions.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
 
@@ -17,6 +20,7 @@
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 
 #include <unistd.h>
+#include <algorithm>
 #include <argparse/argparse.hpp>
 #include <iostream>
 #include <stdexcept>
@@ -87,9 +91,12 @@ inline cuopt::init_logger_t dummy_logger(
 int run_single_file(const std::string& file_path,
                     const std::string& initial_solution_file,
                     bool solve_relaxation,
-                    const std::map<std::string, std::string>& settings_strings)
+                    const std::map<std::string, std::string>& settings_strings,
+                    bool is_remote_solve)
 {
-  const raft::handle_t handle_{};
+  // Only create raft handle for local solve - it triggers CUDA initialization
+  std::unique_ptr<raft::handle_t> handle_ptr;
+  if (!is_remote_solve) { handle_ptr = std::make_unique<raft::handle_t>(); }
   cuopt::linear_programming::solver_settings_t<int, double> settings;
 
   try {
@@ -122,13 +129,15 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
-  auto op_problem =
-    cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
+  // Create a non-owning view from the mps_data_model (using shared conversion function)
+  // solve_lp/solve_mip will handle remote vs local solve based on env vars
+  auto view = cuopt::linear_programming::create_view_from_mps_data_model(mps_data_model);
 
-  const bool is_mip =
-    (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
-     op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) &&
-    !solve_relaxation;
+  // Determine if this is a MIP problem by checking problem category
+  auto problem_category = view.get_problem_category();
+  bool has_integers = (problem_category == cuopt::linear_programming::problem_category_t::MIP) ||
+                      (problem_category == cuopt::linear_programming::problem_category_t::IP);
+  const bool is_mip = has_integers && !solve_relaxation;
 
   try {
     auto initial_solution =
@@ -155,12 +164,24 @@ int run_single_file(const std::string& file_path,
   }
 
   try {
+    // Pass handle_ptr.get() - can be nullptr for remote solve
     if (is_mip) {
       auto& mip_settings = settings.get_mip_settings();
-      auto solution      = cuopt::linear_programming::solve_mip(op_problem, mip_settings);
+      auto solution = cuopt::linear_programming::solve_mip(handle_ptr.get(), view, mip_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        auto log = dummy_logger(settings);
+        CUOPT_LOG_ERROR("MIP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
     } else {
       auto& lp_settings = settings.get_pdlp_settings();
-      auto solution     = cuopt::linear_programming::solve_lp(op_problem, lp_settings);
+      auto solution     = cuopt::linear_programming::solve_lp(handle_ptr.get(), view, lp_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        auto log = dummy_logger(settings);
+        CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
+      // Note: Solution output is now handled by solve_lp/solve_lp_remote via CUOPT_LOG_INFO
     }
   } catch (const std::exception& e) {
     CUOPT_LOG_ERROR("Error: %s", e.what());
@@ -331,22 +352,28 @@ int main(int argc, char* argv[])
   // Get the values
   std::string file_name = program.get<std::string>("filename");
 
-  const auto initial_solution_file = program.get<std::string>("--initial-solution");
-  const auto solve_relaxation      = program.get<bool>("--relaxation");
-
-  // All arguments are parsed as string, default values are parsed as int if unused.
-  const auto num_gpus = program.is_used("--num-gpus")
-                          ? std::stoi(program.get<std::string>("--num-gpus"))
-                          : program.get<int>("--num-gpus");
+  // Check for remote solve BEFORE any CUDA initialization
+  const bool is_remote_solve = cuopt::linear_programming::is_remote_solve_enabled();
 
   std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
 
-  for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
-    cudaSetDevice(i);
-    memory_resources.push_back(make_async());
-    rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+  if (!is_remote_solve) {
+    // Only initialize CUDA resources for local solve
+    // All arguments are parsed as string, default values are parsed as int if unused.
+    const auto num_gpus = program.is_used("--num-gpus")
+                            ? std::stoi(program.get<std::string>("--num-gpus"))
+                            : program.get<int>("--num-gpus");
+
+    for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
+      cudaSetDevice(i);
+      memory_resources.push_back(make_async());
+      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+    }
+    cudaSetDevice(0);
   }
-  cudaSetDevice(0);
 
-  return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings);
+  const auto initial_solution_file = program.get<std::string>("--initial-solution");
+  const auto solve_relaxation      = program.get<bool>("--relaxation");
+  return run_single_file(
+    file_name, initial_solution_file, solve_relaxation, settings_strings, is_remote_solve);
 }
@@ -0,0 +1,58 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+/**
+ * @file data_model_view.hpp
+ * @brief Provides data_model_view_t in the cuopt::linear_programming namespace.
+ *
+ * This header provides access to the data_model_view_t class, a non-owning view
+ * over LP/MIP problem data. The view uses span<T> to hold pointers that can
+ * reference either host or device memory, making it suitable for both local
+ * GPU-based solves and remote CPU-based solves.
+ *
+ * The canonical implementation lives in cuopt::mps_parser for historical reasons
+ * and to maintain mps_parser as a standalone library. This header provides
+ * convenient aliases in the cuopt::linear_programming namespace.
+ */
+
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/utilities/span.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Non-owning span type that can point to either host or device memory.
+ *
+ * This is an alias to the span type defined in mps_parser. The span holds
+ * a pointer and size, but does not own the underlying memory.
+ *
+ * @tparam T Element type
+ */
+template <typename T>
+using span = cuopt::mps_parser::span<T>;
+
+/**
+ * @brief Non-owning view of LP/MIP problem data.
+ *
+ * This is an alias to the data_model_view_t defined in mps_parser.
+ * The view stores problem data (constraint matrix, bounds, objective, etc.)
+ * as span<T> members, which can point to either host or device memory.
+ *
+ * Key features for remote solve support:
+ * - Non-owning: does not allocate or free memory
+ * - Memory-agnostic: spans can point to host OR device memory
+ * - Serializable: host data can be directly serialized for remote solve
+ *
+ * @tparam i_t Integer type for indices (typically int)
+ * @tparam f_t Floating point type for values (typically float or double)
+ */
+template <typename i_t, typename f_t>
+using data_model_view_t = cuopt::mps_parser::data_model_view_t<i_t, f_t>;
+
+}  // namespace cuopt::linear_programming