diff --git a/.gitignore b/.gitignore
index 9edc9823c..c9d5349f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,3 +68,4 @@ docs/cuopt/build
 cpp/include/cuopt/semantic_version.hpp
 !datasets/quadratic_programming
 !datasets/quadratic_programming/**
+dev_scripts/test_c_api
diff --git a/REMOTE_SOLVE_MODES.md b/REMOTE_SOLVE_MODES.md
new file mode 100644
index 000000000..d33b17b1b
--- /dev/null
+++ b/REMOTE_SOLVE_MODES.md
@@ -0,0 +1,888 @@
+# cuOpt Remote Solve Modes and Protocols
+
+This document describes the different operating modes, log retrieval methods, and APIs available for cuOpt remote solving.
+
+---
+
+## Table of Contents
+
+1. [Operating Modes](#1-operating-modes)
+2. [Supported Interfaces](#2-supported-interfaces)
+3. [Log Retrieval Methods](#3-log-retrieval-methods)
+4. [API Endpoints](#4-api-endpoints)
+5. [WAIT_FOR_RESULT API](#5-wait_for_result-api)
+6. [Detection and Configuration](#6-detection-and-configuration)
+7. [Job Status States](#7-job-status-states)
+8. [Python APIs](#8-python-apis)
+9. [Workflow Comparisons](#9-workflow-comparisons)
+10. [Best Practices](#10-best-practices)
+
+---
+
+## 1. Operating Modes
+
+### 1.1 Sync Mode (`CUOPT_REMOTE_USE_SYNC=1`)
+
+**Behavior**: Client sends request and **blocks** until completion
+
+**Architecture**:
+- Job still goes through server queue and worker process
+- Server uses condition variable to block connection until job completes
+- Returns result directly when complete
+
+**Log Streaming**: Real-time log streaming during solve
+
+**Use Case**: Interactive/development use where you want immediate feedback
+
+**Workflow**:
+```
+Client → SUBMIT_JOB (blocking=true) → Server blocks → Worker solves → Server returns result
+```
+
+**Example**:
+```bash
+CUOPT_REMOTE_HOST=localhost \
+CUOPT_REMOTE_PORT=8765 \
+CUOPT_REMOTE_USE_SYNC=1 \
+  cuopt_cli --log-to-console 1 problem.mps
+```
+
+---
+
+### 1.2 Async Mode (default)
+
+**Behavior**: Client submits job, receives `job_id` immediately, non-blocking
+
+**Architecture**:
+- Client polls for status (QUEUED → PROCESSING → COMPLETED/FAILED)
+- Client retrieves logs incrementally using byte offset
+- Client retrieves result when ready
+- Client deletes job to free server memory
+
+**Log Retrieval**: Incremental polling via `GET_LOGS` API
+
+**Use Case**: Batch processing, long-running jobs, multiple concurrent jobs
+
+**Workflow**:
+```
+Client → SUBMIT_JOB → job_id
+     ↓ (poll)
+     → CHECK_STATUS (returns QUEUED/PROCESSING/COMPLETED/FAILED)
+     → GET_LOGS (frombyte offset, returns new log lines)
+     → GET_RESULT (when COMPLETED)
+     → DELETE_RESULT (cleanup)
+```
+
+**Example**:
+```bash
+CUOPT_REMOTE_HOST=localhost \
+CUOPT_REMOTE_PORT=8765 \
+  cuopt_cli problem.mps
+```
+
+---
+
+### 1.3 Hybrid Mode (Async Submit + WAIT_FOR_RESULT)
+
+**Behavior**: Submit async to get `job_id`, then block on wait
+
+**Architecture**:
+- Non-blocking submission returns `job_id` immediately
+- `WAIT_FOR_RESULT` API blocks until completion
+- Can stream logs in parallel thread while waiting
+
+**Log Retrieval**: Parallel thread polls `GET_LOGS` while main thread waits
+
+**Use Case**: Interactive use with job management (cancellation, log streaming)
+
+**Workflow**:
+```
+Client → SUBMIT_JOB (blocking=false) → job_id
+     ↓
+[Thread 1] WAIT_FOR_RESULT(job_id)  [BLOCKS until complete]
+[Thread 2] while running: GET_LOGS(job_id, frombyte)
+     ↓
+result returned automatically
+DELETE_RESULT(job_id)
+```
+
+**Example**: See [Section 5.3](#53-python-usage-example) for Python code
+
+---
+
+## 2. Supported Interfaces
+
+| Interface | Sync Mode | Async Mode | WAIT_FOR_RESULT |
+|-----------|-----------|------------|-----------------|
+| **C++ API** (`solve_lp`, `solve_mip`) | ✓ | ✓ | ✓ (internal) |
+| **Python API** (`Solve()`, `Problem.solve()`) | ✓ | ✓ | ❌ (not wrapped) |
+| **cuopt_cli** | ✓ | ✓ | ❌ (not exposed) |
+| **C API** | ✓ | ✓ | ❌ (not exposed) |
+| **Python `cancel_job()`** | - | ✓ | ✓ |
+| **Low-level Protobuf** | ✓ | ✓ | ✓ |
+
+**Note**: All high-level interfaces are **transparent** - they automatically detect remote solve via environment variables and handle the full async polling loop internally.
+
+---
+
+## 3. Log Retrieval Methods
+
+### 3.1 Sync Mode Logging
+
+**Method**: Real-time streaming
+
+**How it works**:
+- Server captures stdout from worker process
+- Streams log data to client in real-time over TCP connection
+- Logs printed to console as solver runs
+
+**Control**: Set `log_to_console=1` in solver settings
+
+**Example**:
+```bash
+CUOPT_REMOTE_HOST=localhost \
+CUOPT_REMOTE_PORT=8765 \
+CUOPT_REMOTE_USE_SYNC=1 \
+  cuopt_cli --log-to-console 1 problem.mps
+```
+
+**Output**: Logs appear immediately as solver runs
+
+---
+
+### 3.2 Async Mode Logging
+
+**Method**: Incremental polling via `GET_LOGS` API
+
+**How it works**:
+- Logs written to `/tmp/cuopt_logs/log_{job_id}` on server
+- Client calls `GET_LOGS` with byte offset (`frombyte`)
+- Server returns new log content from offset to current position
+- Client updates offset for next poll
+
+**Implementation** (C++ client):
+```cpp
+int64_t log_frombyte = 0;
+while (true) {
+  auto [job_exists, new_frombyte] = get_logs(host, port, job_id, log_frombyte);
+  if (job_exists) {
+    log_frombyte = new_frombyte;
+  }
+  // Check status...
+  sleep(0.1);
+}
+```
+
+**Python example** (low-level):
+```python
+req = pb.AsyncRequest()
+req.request_type = pb.GET_LOGS
+req.job_id = job_id
+req.frombyte = frombyte
+
+response = send_recv(req)
+logs = response.logs_response
+for line in logs.log_lines:
+    print(line)
+frombyte = logs.nbytes  # Update for next poll
+```
+
+---
+
+### 3.3 WAIT_FOR_RESULT Logging
+
+**Method**: Parallel thread polling `GET_LOGS` while main thread waits
+
+**How it works**:
+- Main thread blocks on `WAIT_FOR_RESULT`
+- Separate thread continuously polls `GET_LOGS`
+- Both threads access same job using `job_id`
+- Log thread stops when main thread returns result
+
+**Advantages**:
+- Real-time log visibility
+- No need to manually poll status
+- Clean blocking semantics with parallel logging
+
+See [Section 5.3](#53-python-usage-example) for complete code example.
+
+---
+
+## 4. API Endpoints
+
+### 4.1 Protobuf Protocol
+
+All communication uses length-prefixed Protocol Buffer messages:
+
+```
+[8-byte size (uint64_t)][serialized protobuf data]
+```
+
+### 4.2 Request Types
+
+| Endpoint | Sync | Async | Hybrid | Returns |
+|----------|------|-------|--------|---------|
+| `SUBMIT_JOB` (blocking=false) | - | ✓ | ✓ | `job_id` |
+| `SUBMIT_JOB` (blocking=true) | ✓ | - | - | Full result (blocks) |
+| `CHECK_STATUS` | - | ✓ | ✓ | `JobStatus` enum |
+| `GET_LOGS` | - | ✓ | ✓ | Log content + new offset |
+| `GET_RESULT` | - | ✓ | ✓ | Serialized solution |
+| `DELETE_RESULT` | - | ✓ | ✓ | Success status |
+| `CANCEL_JOB` | - | ✓ | ✓ | Cancel status |
+| `WAIT_FOR_RESULT` | - | - | ✓ | Serialized solution (blocks) |
+
+### 4.3 Protobuf Enum Definition
+
+File: `cpp/src/linear_programming/utilities/cuopt_remote.proto`
+
+```protobuf
+enum AsyncRequestType {
+  SUBMIT_JOB = 0;        // Submit a new job
+  CHECK_STATUS = 1;      // Check job status
+  GET_RESULT = 2;        // Retrieve completed result
+  DELETE_RESULT = 3;     // Delete result from server
+  GET_LOGS = 4;          // Retrieve buffered log entries
+  CANCEL_JOB = 5;        // Cancel a queued or running job
+  WAIT_FOR_RESULT = 6;   // Block until job completes, returns result
+}
+```
+
+---
+
+## 5. WAIT_FOR_RESULT API
+
+### 5.1 Overview
+
+`WAIT_FOR_RESULT` is a **hybrid async/blocking mode** that combines the benefits of both sync and async modes:
+
+- **Submit async** → Get `job_id` back immediately (non-blocking submission)
+- **Wait on result** → Block until job completes (no polling loop needed)
+- **Stream logs in parallel** → Another thread can poll `GET_LOGS` while waiting
+
+**This is the best of both worlds for interactive use!**
+
+### 5.2 Server Implementation
+
+File: `cpp/cuopt_remote_server.cpp`
+
+#### Core Function (lines 1539-1602)
+
+```cpp
+bool wait_for_result(const std::string& job_id,
+                     std::vector<uint8_t>& result_data,
+                     std::string& error_message)
+{
+  // First check if job already completed
+  {
+    std::lock_guard<std::mutex> lock(tracker_mutex);
+    auto it = job_tracker.find(job_id);
+
+    // If already in terminal state, return immediately
+    if (it->second.status == JobStatus::COMPLETED) {
+      result_data = it->second.result_data;
+      return true;
+    } else if (it->second.status == JobStatus::FAILED) {
+      error_message = it->second.error_message;
+      return false;
+    } else if (it->second.status == JobStatus::CANCELLED) {
+      error_message = "Job was cancelled";
+      return false;
+    }
+  }
+
+  // Job is still running - create a waiter and wait on condition variable
+  auto waiter = std::make_shared<JobWaiter>();
+  {
+    std::lock_guard<std::mutex> lock(waiters_mutex);
+    waiting_threads[job_id] = waiter;
+  }
+
+  // Wait on the condition variable - this thread will sleep until signaled
+  {
+    std::unique_lock<std::mutex> lock(waiter->mutex);
+    waiter->cv.wait(lock, [&waiter] { return waiter->ready; });
+  }
+
+  // Wakes up when result_retrieval_thread signals the CV
+  if (waiter->success) {
+    result_data = std::move(waiter->result_data);
+    return true;
+  } else {
+    error_message = waiter->error_message;
+    return false;
+  }
+}
+```
+
+#### Synchronization Mechanism
+
+**JobWaiter struct**:
+```cpp
+struct JobWaiter {
+  std::mutex mutex;
+  std::condition_variable cv;
+  bool ready = false;
+  bool success = false;
+  std::vector<uint8_t> result_data;
+  std::string error_message;
+};
+```
+
+**Signaling in result_retrieval_thread** (line 1186-1198):
+```cpp
+// Check if there's a blocking waiter
+{
+  std::lock_guard<std::mutex> lock(waiters_mutex);
+  auto wit = waiting_threads.find(job_id);
+  if (wit != waiting_threads.end()) {
+    // Wake up the waiting thread
+    auto waiter = wit->second;
+    waiter->result_data = std::move(result_data);
+    waiter->error_message = error_message;
+    waiter->success = success;
+    waiter->ready = true;
+    waiter->cv.notify_one();  // <-- WAKE UP!
+  }
+}
+```
+
+### 5.3 Python Usage Example
+
+File: `test_wait_with_logs.py`
+
+```python
+import threading
+import time
+import cuopt_remote_pb2 as pb
+
+# 1. Submit job (async) to get job_id
+req = pb.AsyncRequest()
+req.request_type = pb.SUBMIT_JOB
+req.blocking = False  # Get job_id back immediately
+
+# Set up problem...
+lp = req.lp_request
+lp.problem.c.extend([1.0] * n_vars)
+# ... more problem setup ...
+
+response = send_recv(req)
+job_id = response.submit_response.job_id
+print(f"Job ID: {job_id}")
+
+# 2. Start log streaming thread
+def log_streaming_thread(job_id, stop_event):
+    frombyte = 0
+    while not stop_event.is_set():
+        # Poll GET_LOGS
+        req = pb.AsyncRequest()
+        req.request_type = pb.GET_LOGS
+        req.job_id = job_id
+        req.frombyte = frombyte
+
+        response = send_recv(req)
+        logs_resp = response.logs_response
+
+        if logs_resp.log_lines:
+            for line in logs_resp.log_lines:
+                print(f"[LOG] {line}")
+            frombyte = logs_resp.nbytes
+
+        time.sleep(0.05)  # Small delay to avoid hammering server
+
+stop_event = threading.Event()
+log_thread = threading.Thread(target=log_streaming_thread, args=(job_id, stop_event))
+log_thread.start()
+
+# 3. Main thread: Wait for result (BLOCKS)
+print("Calling WAIT_FOR_RESULT (blocking)...")
+req = pb.AsyncRequest()
+req.request_type = pb.WAIT_FOR_RESULT
+req.job_id = job_id
+
+start = time.time()
+response = send_recv(req, timeout=120)
+elapsed = time.time() - start
+
+# 4. Stop log thread
+stop_event.set()
+log_thread.join()
+
+# 5. Use result
+result = response.result_response
+if result.HasField('lp_solution'):
+    sol = result.lp_solution
+    print(f"Completed in {elapsed:.2f}s")
+    print(f"Objective: {sol.primal_objective:.6f}")
+else:
+    print(f"Error: {result.error_message}")
+
+# 6. Cleanup
+del_req = pb.AsyncRequest()
+del_req.request_type = pb.DELETE_RESULT
+del_req.job_id = job_id
+send_recv(del_req)
+```
+
+### 5.4 Key Benefits
+
+| Feature | Sync Mode | Async Polling | **WAIT_FOR_RESULT** |
+|---------|-----------|---------------|---------------------|
+| Non-blocking submit | ❌ | ✓ | ✓ |
+| Get job_id back | ❌ | ✓ | ✓ |
+| Parallel log streaming | ❌ | ✓ | ✓ |
+| No polling loop needed | ✓ | ❌ | ✓ |
+| Connection efficiency | Poor | Good | Good |
+| Can cancel job | ✓ | ✓ | ✓ |
+
+**WAIT_FOR_RESULT combines all the best features!**
+
+### 5.5 Important Notes
+
+1. **No auto-delete**: `WAIT_FOR_RESULT` does NOT automatically delete the job after returning the result. This allows you to:
+   - Retrieve remaining logs with `GET_LOGS` after completion
+   - Call `DELETE_RESULT` when you're done with logs
+
+2. **Connection held open**: The TCP connection remains open while waiting (can be many seconds/minutes for large problems)
+
+3. **Same result format**: The response is identical to `GET_RESULT` (serialized solution)
+
+4. **Thread-safe**: Multiple clients can wait on different jobs simultaneously
+
+5. **Not yet wrapped**: `WAIT_FOR_RESULT` is currently only available via low-level Protobuf protocol. High-level Python/C++ wrappers would need to be added (similar to existing `cancel_job()` wrapper).
+
+---
+
+## 6. Detection and Configuration
+
+### 6.1 Transparent Remote Solve Detection
+
+The same code works for both local and remote:
+
+```python
+# Works locally if no env vars set, remotely if set
+solution = solve_lp(data_model, settings)
+```
+
+**Detection logic** (in all interfaces):
+```cpp
+bool is_remote = (getenv("CUOPT_REMOTE_HOST") && getenv("CUOPT_REMOTE_PORT"));
+bool sync_mode = (getenv("CUOPT_REMOTE_USE_SYNC") == "1");
+```
+
+### 6.2 Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `CUOPT_REMOTE_HOST` | Server hostname/IP | (none - local solve) |
+| `CUOPT_REMOTE_PORT` | Server port | (none - local solve) |
+| `CUOPT_REMOTE_USE_SYNC` | Use sync mode if "1" | "0" (async) |
+| `CUOPT_SERIALIZER_LIB` | Path to custom serializer | (uses protobuf) |
+
+**Example**:
+```bash
+export CUOPT_REMOTE_HOST=gpu-server.example.com
+export CUOPT_REMOTE_PORT=8765
+export CUOPT_REMOTE_USE_SYNC=1  # Optional: force sync mode
+```
+
+---
+
+## 7. Job Status States
+
+```
+QUEUED      (0) → Job waiting for available worker
+PROCESSING  (1) → Worker is solving the problem
+COMPLETED   (2) → Solve finished successfully
+FAILED      (3) → Solve failed with error
+NOT_FOUND   (4) → Job ID doesn't exist on server
+CANCELLED   (5) → Job was cancelled by user
+```
+
+**State Transitions**:
+```
+QUEUED → PROCESSING → COMPLETED
+                   → FAILED
+                   → CANCELLED (via CANCEL_JOB)
+```
+
+---
+
+## 8. Python APIs
+
+### 8.1 Cancel Job (Available Now)
+
+File: `python/cuopt/cuopt/linear_programming/remote.py`
+
+```python
+from cuopt.linear_programming import cancel_job, JobStatus
+
+result = cancel_job("job_abc123", host="localhost", port=8765)
+print(f"Success: {result.success}")
+print(f"Status: {result.job_status}")  # JobStatus enum
+print(f"Message: {result.message}")
+```
+
+**Returns**: `CancelResult` dataclass
+- `success: bool` - Whether cancellation succeeded
+- `message: str` - Status message
+- `job_status: JobStatus` - Final job status
+
+### 8.2 Future API (Not Yet Implemented)
+
+To match the `cancel_job()` pattern, these could be added:
+
+```python
+# Submit async and get job_id
+def submit_job_async(data_model, settings, host=None, port=None) -> str:
+    """Submit job and return job_id immediately."""
+    pass
+
+# Block until complete
+def wait_for_result(job_id: str, host=None, port=None) -> Solution:
+    """Wait for job completion and return result (blocks)."""
+    pass
+
+# Poll for status
+def get_job_status(job_id: str, host=None, port=None) -> JobStatus:
+    """Check job status without blocking."""
+    pass
+
+# Get logs
+def get_job_logs(job_id: str, frombyte: int = 0, host=None, port=None) -> tuple[list[str], int]:
+    """Get logs from byte offset, returns (log_lines, new_offset)."""
+    pass
+
+# Cleanup
+def delete_job(job_id: str, host=None, port=None) -> bool:
+    """Delete job from server."""
+    pass
+```
+
+---
+
+## 9. Workflow Comparisons
+
+### 9.1 Sync Mode Workflow
+
+```
+Client:
+  connect()
+  send(SUBMIT_JOB, blocking=true, problem_data)
+  [BLOCKS - connection stays open]
+  receive(solution)
+  [Logs streamed in real-time during blocking]
+  disconnect()
+```
+
+**Pros**:
+- Simple - one request/response
+- Real-time logs
+- Blocking semantics
+
+**Cons**:
+- Connection held open entire time
+- Can't cancel easily
+- No job_id for tracking
+
+---
+
+### 9.2 Async Mode Workflow (Traditional Polling)
+
+```
+Client:
+  # Submit
+  job_id = submit_job(problem)
+
+  # Poll until complete
+  while True:
+    status = check_status(job_id)
+    if status == COMPLETED:
+      break
+    if status == FAILED:
+      error()
+
+    # Optionally get logs
+    if verbose:
+      logs = get_logs(job_id, frombyte)
+      print(logs)
+      frombyte = update_offset(logs)
+
+    sleep(0.1)
+
+  # Retrieve result
+  solution = get_result(job_id)
+
+  # Cleanup
+  delete_job(job_id)
+```
+
+**Pros**:
+- Non-blocking
+- Can cancel anytime
+- Job_id for tracking
+- Multiple clients can monitor same job
+
+**Cons**:
+- Polling loop complexity
+- Delayed log visibility (polling interval)
+- More network requests
+
+---
+
+### 9.3 WAIT_FOR_RESULT Workflow (Best of Both)
+
+```
+Client:
+  # Submit async
+  job_id = submit_job(problem, blocking=false)
+
+  # Start log thread
+  Thread 1:
+    while not done:
+      logs = get_logs(job_id, frombyte)
+      print(logs)
+      frombyte = update_offset(logs)
+      sleep(0.05)
+
+  # Main thread: wait for result
+  Thread 2 (main):
+    solution = wait_for_result(job_id)  # BLOCKS
+
+  # Stop log thread
+  stop_log_thread()
+
+  # Cleanup
+  delete_job(job_id)
+```
+
+**Pros**:
+- Non-blocking submit (get job_id)
+- Real-time logs via parallel thread
+- Blocking wait (no polling loop)
+- Can cancel from another client
+- Clean separation of concerns
+
+**Cons**:
+- Requires threading
+- Slightly more complex than sync mode
+
+---
+
+## 10. Best Practices
+
+### 10.1 Choosing a Mode
+
+**Use Sync Mode when**:
+- Interactive development/debugging
+- Single-shot solves
+- Simplicity is priority
+- Don't need job_id
+
+**Use Async Mode when**:
+- Batch processing
+- Long-running jobs
+- Need to manage multiple jobs
+- Want to cancel jobs
+- Production systems
+
+**Use WAIT_FOR_RESULT when**:
+- Interactive use
+- Want real-time logs
+- Need job_id for tracking/cancellation
+- Don't want polling complexity
+
+### 10.2 Log Retrieval Best Practices
+
+**For Sync Mode**:
+```bash
+# Always enable log_to_console for sync mode
+CUOPT_REMOTE_USE_SYNC=1 cuopt_cli --log-to-console 1 problem.mps
+```
+
+**For Async Mode**:
+```python
+# Poll logs frequently for near-real-time visibility
+frombyte = 0
+while job_running:
+    logs = get_logs(job_id, frombyte)
+    frombyte = logs.nbytes
+    time.sleep(0.05)  # 50ms polling interval
+```
+
+**For WAIT_FOR_RESULT**:
+```python
+# Use dedicated thread for logs
+log_thread = threading.Thread(target=log_poller, args=(job_id,))
+log_thread.daemon = True  # Exit when main thread exits
+log_thread.start()
+
+result = wait_for_result(job_id)
+```
+
+### 10.3 Error Handling
+
+**Always check job status**:
+```python
+result = wait_for_result(job_id)
+if result.status == pb.FAILED:
+    print(f"Error: {result.error_message}")
+    # Get final logs for debugging
+    logs = get_logs(job_id, 0)
+    print(logs)
+```
+
+**Handle worker crashes**:
+```python
+# Server automatically marks jobs as FAILED if worker dies
+# Check status periodically
+status = check_status(job_id)
+if status == FAILED:
+    # Retrieve error message
+    result = get_result(job_id)  # Contains error_message
+```
+
+### 10.4 Resource Cleanup
+
+**Always delete completed jobs**:
+```python
+try:
+    result = wait_for_result(job_id)
+    # ... use result ...
+finally:
+    delete_job(job_id)  # Free server memory
+```
+
+**For long-running servers**:
+- Implement periodic cleanup of old completed jobs
+- Monitor `/tmp/cuopt_logs` directory size
+- Consider auto-deletion after N hours
+
+---
+
+## Appendix: Architecture Diagrams
+
+### A.1 Sync Mode Flow
+
+```
+Client                    Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB (blocking=1)  │                           │
+  │──────────────────────────▶│                           │
+  │                           │  Add to job queue         │
+  │                           │──────────────────────────▶│
+  │                           │                           │
+  │    (connection held       │                           │
+  │     open, blocking)       │      Execute GPU Solve    │
+  │                           │◀─────────────────────────▶│
+  │                           │                           │
+  │                           │  Logs streamed to client  │
+  │◀─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│                           │
+  │                           │                           │
+  │                           │      Write Result         │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │  Response (solution)      │                           │
+  │◀──────────────────────────│                           │
+```
+
+### A.2 Async Mode Flow
+
+```
+Client                    Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB (blocking=0)  │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (job_id)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  CHECK_STATUS (job_id)    │                           │
+  │──────────────────────────▶│                           │
+  │  Response (QUEUED)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │      Execute GPU Solve    │
+  │                           │◀─────────────────────────▶│
+  │                           │                           │
+  │  GET_LOGS (job_id, 0)     │                           │
+  │──────────────────────────▶│                           │
+  │  Response (logs 0-1000)   │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  CHECK_STATUS (job_id)    │                           │
+  │──────────────────────────▶│                           │
+  │  Response (PROCESSING)    │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │      Write Result         │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │  CHECK_STATUS (job_id)    │                           │
+  │──────────────────────────▶│                           │
+  │  Response (COMPLETED)     │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  GET_RESULT (job_id)      │                           │
+  │──────────────────────────▶│                           │
+  │  Response (solution)      │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  DELETE_RESULT (job_id)   │                           │
+  │──────────────────────────▶│                           │
+  │  Response (SUCCESS)       │                           │
+  │◀──────────────────────────│                           │
+```
+
+### A.3 WAIT_FOR_RESULT Flow
+
+```
+Client                    Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB (blocking=0)  │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (job_id)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  ├─[Thread 1]────────────────┤                           │
+  │  WAIT_FOR_RESULT (job_id) │                           │
+  │──────────────────────────▶│                           │
+  │                           │  Handler creates JobWaiter│
+  │    (connection held       │  Thread blocks on CV      │
+  │     open, no response)    │                           │
+  │                           │                           │
+  ├─[Thread 2]────────────────┤                           │
+  │  GET_LOGS (job_id, 0)     │                           │
+  │──────────────────────────▶│                           │
+  │  Response (logs)          │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  GET_LOGS (job_id, 1000)  │      Execute GPU Solve    │
+  │──────────────────────────▶│◀─────────────────────────▶│
+  │  Response (logs)          │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │      Write Result         │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │                           │  Result thread signals CV │
+  │                           │  Handler wakes up         │
+  │                           │                           │
+  │  Response (solution)      │                           │
+  │◀──────────────────────────│                           │
+  └─[Thread 1 returns]        │                           │
+  │                           │                           │
+  └─[Thread 2 stops polling]  │                           │
+```
+
+---
+
+## References
+
+- **Implementation**: `cpp/cuopt_remote_server.cpp`
+- **Client Logic**: `cpp/src/linear_programming/utilities/remote_solve.cu`
+- **Protocol Definition**: `cpp/src/linear_programming/utilities/cuopt_remote.proto`
+- **Python Wrappers**: `python/cuopt/cuopt/linear_programming/remote.py`
+- **Architecture Doc**: `docs/remote_solve_architecture.md`
+- **Developer Guide**: `docs/developer/REMOTE_SOLVE_GUIDE.md`
+- **Test Example**: `test_wait_with_logs.py`
diff --git a/build.sh b/build.sh
index 1ee8e87fc..fd85e6731 100755
--- a/build.sh
+++ b/build.sh
@@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd)
 LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build}
 LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build}
 
-VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
+VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client cuopt_remote_server docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -25,6 +25,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    cuopt            - build the cuopt Python package
    cuopt_server     - build the cuopt_server Python package
    cuopt_sh_client  - build cuopt self host client
+   cuopt_remote_server - build the cuopt remote solve server executable
    docs             - build the docs
    deb              - build deb package (requires libcuopt to be built first)
  and <flag> is:
@@ -390,6 +391,30 @@ if buildAll || hasArg libcuopt; then
     fi
 fi
 
+################################################################################
+# Build the cuopt remote solve server
+if hasArg cuopt_remote_server; then
+    if [ ! -d "${LIBCUOPT_BUILD_DIR}" ]; then
+        echo "Error: libcuopt must be built before cuopt_remote_server. Run with 'libcuopt' target first."
+        exit 1
+    fi
+    cd "${LIBCUOPT_BUILD_DIR}"
+
+    # Reconfigure with BUILD_REMOTE_SERVER=ON
+    cmake -DBUILD_REMOTE_SERVER=ON "${LIBCUOPT_BUILD_DIR}"
+
+    # Build the server target
+    cmake --build "${LIBCUOPT_BUILD_DIR}" --target cuopt_remote_server ${VERBOSE_FLAG} -j"${PARALLEL_LEVEL}"
+
+    # Install the server executable
+    if [ -z "${INSTALL_TARGET}" ]; then
+        echo "Skipping install of cuopt_remote_server (-n flag set)"
+    else
+        install -m 755 "${LIBCUOPT_BUILD_DIR}/cuopt_remote_server" "${INSTALL_PREFIX}/bin/"
+        echo "Installed cuopt_remote_server to ${INSTALL_PREFIX}/bin/"
+    fi
+fi
+
 ################################################################################
 # Build deb package
 if hasArg deb; then
diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh
index 640562ed3..dec5d1c9e 100755
--- a/ci/build_wheel_libcuopt.sh
+++ b/ci/build_wheel_libcuopt.sh
@@ -17,6 +17,9 @@ fi
 # Install Boost and TBB
 bash ci/utils/install_boost_tbb.sh
 
+# Install Protobuf
+bash ci/utils/install_protobuf.sh
+
 export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON"
 
 # For pull requests we are enabling assert mode.
diff --git a/ci/utils/install_protobuf.sh b/ci/utils/install_protobuf.sh
new file mode 100755
index 000000000..c17a3bb01
--- /dev/null
+++ b/ci/utils/install_protobuf.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+# Install Protobuf development libraries
+if [ -f /etc/os-release ]; then
+    . /etc/os-release
+    if [[ "$ID" == "rocky" ]]; then
+        echo "Detected Rocky Linux. Installing Protobuf via dnf..."
+        # Enable PowerTools (Rocky 8) or CRB (Rocky 9) repository for protobuf-devel
+        if [[ "${VERSION_ID%%.*}" == "8" ]]; then
+            dnf config-manager --set-enabled powertools || dnf config-manager --set-enabled PowerTools || true
+        elif [[ "${VERSION_ID%%.*}" == "9" ]]; then
+            dnf config-manager --set-enabled crb || true
+        fi
+        dnf install -y protobuf-devel protobuf-compiler
+    elif [[ "$ID" == "ubuntu" ]]; then
+        echo "Detected Ubuntu. Installing Protobuf via apt..."
+        apt-get update
+        apt-get install -y libprotobuf-dev protobuf-compiler
+    else
+        echo "Unknown OS: $ID. Please install Protobuf development libraries manually."
+        exit 1
+    fi
+else
+    echo "/etc/os-release not found. Cannot determine OS. Please install Protobuf development libraries manually."
+    exit 1
+fi
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 0594f8106..8a362516b 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -32,6 +32,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 55b3d3dfb..248c74ce9 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -32,6 +32,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml.orig b/conda/environments/all_cuda-130_arch-x86_64.yaml.orig
new file mode 100644
index 000000000..68ac02cdd
--- /dev/null
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml.orig
@@ -0,0 +1,82 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai-nightly
+- rapidsai
+- conda-forge
+dependencies:
+- boost
+- breathe
+- bzip2
+- c-compiler
+- ccache
+- clang-tools=20.1.4
+- clang==20.1.4
+- cmake>=3.30.4
+- cpp-argparse
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-python>=13.0.1,<14.0a0
+- cuda-sanitizer-api
+- cuda-version=13.0
+- cudf==26.2.*,>=0.0.0a0
+- cupy>=13.6.0
+- cxx-compiler
+- cython>=3.0.3
+- doxygen=1.9.1
+- exhale
+- fastapi
+- gcc_linux-64=14.*
+- gmock
+- gtest
+- ipython
+- jsonref==1.1.0
+- libcudss-dev >=0.7
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- libraft-headers==26.2.*,>=0.0.0a0
+- librmm==26.2.*,>=0.0.0a0
+- make
+- msgpack-numpy==0.4.8
+- msgpack-python==1.1.0
+- myst-nb
+- myst-parser
+- ninja
+- notebook
+- numba-cuda>=0.19.1,<0.20.0a0
+- numba>=0.60.0
+- numpy>=1.23.5,<3.0a0
+- numpydoc
+- pandas>=2.0
+- pexpect
+- pip
+- pre-commit
+- psutil>=6.0.0
+- pylibraft==26.2.*,>=0.0.0a0
+- pyrsistent
+- pytest-cov
+- pytest<8
+- python>=3.10,<3.14
+- rapids-build-backend>=0.4.0,<0.5.0.dev0
+- rapids-dask-dependency==26.2.*,>=0.0.0a0
+- rapids-logger==0.2.*,>=0.0.0a0
+- requests
+- rmm==26.2.*,>=0.0.0a0
+- scikit-build-core>=0.10.0
+- sphinx
+- sphinx-copybutton
+- sphinx-design
+- sphinx-markdown-tables
+- sphinx_rtd_theme
+- sphinxcontrib-openapi
+- sphinxcontrib-websupport
+- sysroot_linux-64==2.28
+- tbb-devel
+- uvicorn==0.34.*
+- zlib
+- pip:
+  - nvidia_sphinx_theme
+  - swagger-plugin-for-sphinx
+  - veroviz
+name: all_cuda-130_arch-x86_64
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index da9337c83..9475b5109 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -32,6 +32,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index c2e8d7dbc..bc99d55f9 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -32,6 +32,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml
index 8e56c6934..f9e0761a7 100644
--- a/conda/recipes/libcuopt/recipe.yaml
+++ b/conda/recipes/libcuopt/recipe.yaml
@@ -62,6 +62,7 @@ cache:
       - tbb-devel
       - zlib
       - bzip2
+      - libprotobuf
     host:
       - cpp-argparse
       - cuda-version =${{ cuda_version }}
@@ -76,6 +77,7 @@ cache:
       - tbb-devel
       - zlib
       - bzip2
+      - libprotobuf
 
 outputs:
   - package:
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index fac00c66a..a06ed705d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -139,6 +139,14 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false")
 endif()
 list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror --default-stream=per-thread)
+# GCC (especially newer versions) can emit false-positive -Warray-bounds diagnostics from
+# 3rd-party headers (e.g., Protobuf) that get promoted to hard errors via -Werror above.
+# Keep -Werror for our code but do not fail the build on this specific diagnostic.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wno-error=array-bounds -Xcompiler=-Wno-array-bounds)
+  # Protobuf headers can also trigger GCC -Wstringop-overread false-positives under heavy inlining.
+  list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wno-error=stringop-overread -Xcompiler=-Wno-stringop-overread)
+endif()
 if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall)
 else()
@@ -225,6 +233,27 @@ create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt)
 
 find_package(CUDSS REQUIRED)
 
+# Protocol Buffers for remote solve serialization
+find_package(Protobuf REQUIRED)
+include_directories(SYSTEM ${Protobuf_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+# Generate C++ code from .proto file
+set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities/cuopt_remote.proto")
+set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc")
+set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h")
+
+add_custom_command(
+  OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}"
+  COMMAND ${Protobuf_PROTOC_EXECUTABLE}
+  ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
+       --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities
+       ${PROTO_FILE}
+  DEPENDS ${PROTO_FILE}
+  COMMENT "Generating C++ code from cuopt_remote.proto"
+  VERBATIM
+)
+
 if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
@@ -236,6 +265,7 @@ if (HOST_LINEINFO)
 endif()
 add_library(cuopt SHARED
   ${CUOPT_SRC_FILES}
+  ${PROTO_SRCS}
 )
 
 set_target_properties(cuopt
@@ -341,10 +371,11 @@ target_link_libraries(cuopt
   rapids_logger::rapids_logger
   CCCL::CCCL
   raft::raft
-  cuopt::mps_parser
   ${CUDSS_LIB_FILE}
   PRIVATE
   ${CUOPT_PRIVATE_CUDA_LIBS}
+  cuopt::mps_parser_static  # Static link - symbols embedded in libcuopt.so
+  protobuf::libprotobuf
   )
 
 
@@ -525,6 +556,74 @@ if(BUILD_LP_BENCHMARKS)
   endif()
 endif()
 
+# ##################################################################################################
+# - cuOpt Remote Server ---------------------------------------------------------------------------
+option(BUILD_REMOTE_SERVER "Build cuOpt remote solve server" OFF)
+if(BUILD_REMOTE_SERVER AND NOT BUILD_LP_ONLY)
+  add_executable(cuopt_remote_server cuopt_remote_server.cpp)
+  target_compile_options(cuopt_remote_server
+    PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+  )
+  target_include_directories(cuopt_remote_server
+    PRIVATE
+    "${CMAKE_CURRENT_SOURCE_DIR}/src"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include"
+    "${CMAKE_CURRENT_BINARY_DIR}"
+    PUBLIC
+    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+    "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  )
+  target_link_libraries(cuopt_remote_server
+    PUBLIC
+    cuopt
+    OpenMP::OpenMP_CXX
+    PRIVATE
+    protobuf::libprotobuf
+  )
+  # Use RUNPATH so LD_LIBRARY_PATH can override conda paths during development
+  set_target_properties(cuopt_remote_server PROPERTIES
+    SKIP_BUILD_RPATH OFF
+    BUILD_WITH_INSTALL_RPATH OFF
+    INSTALL_RPATH "$ORIGIN/../${lib_dir}"
+  )
+  # Enable RUNPATH (new dtags) so LD_LIBRARY_PATH takes precedence
+  target_link_options(cuopt_remote_server PRIVATE -Wl,--enable-new-dtags)
+
+  # Install the server executable
+  install(TARGETS cuopt_remote_server
+    COMPONENT runtime
+    RUNTIME DESTINATION ${_BIN_DEST}
+  )
+
+  # cuopt_cancel_job - standalone utility to cancel jobs
+  add_executable(cuopt_cancel_job cuopt_cancel_job.cpp)
+  target_compile_options(cuopt_cancel_job
+    PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+  )
+  target_include_directories(cuopt_cancel_job
+    PRIVATE
+    "${CMAKE_CURRENT_SOURCE_DIR}/src"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include"
+    "${CMAKE_CURRENT_BINARY_DIR}/include"
+  )
+  target_link_libraries(cuopt_cancel_job
+    PUBLIC
+    cuopt
+  )
+  target_link_options(cuopt_cancel_job PRIVATE -Wl,--enable-new-dtags)
+
+  install(TARGETS cuopt_cancel_job
+    COMPONENT runtime
+    RUNTIME DESTINATION ${_BIN_DEST}
+  )
+endif()
+
+# ##################################################################################################
+# - Pluggable Serializers --------------------------------------------------------------------------
+# Build optional serializer plugins (e.g., msgpack)
+add_subdirectory(src/linear_programming/utilities/serializers)
 
 # ##################################################################################################
 # - CPack has to be the last item in the cmake file-------------------------------------------------
diff --git a/cpp/cuopt_cancel_job.cpp b/cpp/cuopt_cancel_job.cpp
new file mode 100644
index 000000000..229302d17
--- /dev/null
+++ b/cpp/cuopt_cancel_job.cpp
@@ -0,0 +1,106 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file cuopt_cancel_job.cpp
+ * @brief Standalone utility to cancel jobs on a cuopt_remote_server
+ *
+ * Usage:
+ *   cuopt_cancel_job <job_id> [-h host] [-p port]
+ *
+ * Examples:
+ *   cuopt_cancel_job job_1234567890abcdef
+ *   cuopt_cancel_job job_1234567890abcdef -h 192.168.1.100 -p 9090
+ */
+
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+
+using namespace cuopt::linear_programming;
+
+void print_usage(const char* prog)
+{
+  std::cout << "Usage: " << prog << " <job_id> [options]\n"
+            << "\n"
+            << "Cancel a job on a cuopt_remote_server.\n"
+            << "\n"
+            << "Arguments:\n"
+            << "  job_id           The job ID to cancel\n"
+            << "\n"
+            << "Options:\n"
+            << "  -h HOST          Server hostname (default: localhost)\n"
+            << "  -p PORT          Server port (default: 9090)\n"
+            << "  --help           Show this help message\n"
+            << "\n"
+            << "Environment Variables:\n"
+            << "  CUOPT_REMOTE_HOST   Default server host\n"
+            << "  CUOPT_REMOTE_PORT   Default server port\n"
+            << "\n"
+            << "Examples:\n"
+            << "  " << prog << " job_1234567890abcdef\n"
+            << "  " << prog << " job_1234567890abcdef -h 192.168.1.100 -p 9090\n";
+}
+
+const char* status_to_string(remote_job_status_t status)
+{
+  switch (status) {
+    case remote_job_status_t::QUEUED: return "QUEUED";
+    case remote_job_status_t::PROCESSING: return "PROCESSING";
+    case remote_job_status_t::COMPLETED: return "COMPLETED";
+    case remote_job_status_t::FAILED: return "FAILED";
+    case remote_job_status_t::NOT_FOUND: return "NOT_FOUND";
+    case remote_job_status_t::CANCELLED: return "CANCELLED";
+    default: return "UNKNOWN";
+  }
+}
+
+int main(int argc, char** argv)
+{
+  // Parse arguments
+  std::string job_id;
+  std::string host = "localhost";
+  int port         = 9090;
+
+  // Check environment variables first
+  const char* env_host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* env_port = std::getenv("CUOPT_REMOTE_PORT");
+  if (env_host && env_host[0]) { host = env_host; }
+  if (env_port && env_port[0]) { port = std::atoi(env_port); }
+
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "--help") == 0) {
+      print_usage(argv[0]);
+      return 0;
+    } else if (strcmp(argv[i], "-h") == 0 && i + 1 < argc) {
+      host = argv[++i];
+    } else if (strcmp(argv[i], "-p") == 0 && i + 1 < argc) {
+      port = std::stoi(argv[++i]);
+    } else if (argv[i][0] != '-') {
+      job_id = argv[i];
+    }
+  }
+
+  if (job_id.empty()) {
+    std::cerr << "Error: No job_id specified\n\n";
+    print_usage(argv[0]);
+    return 1;
+  }
+
+  // Cancel the job using the remote solve API
+  remote_solve_config_t config{host, port};
+  auto result = cancel_job_remote(config, job_id);
+
+  // Print result
+  std::cout << "Job ID: " << job_id << "\n";
+  std::cout << "Result: " << (result.success ? "SUCCESS" : "FAILED") << "\n";
+  std::cout << "Message: " << result.message << "\n";
+  std::cout << "Job Status: " << status_to_string(result.job_status) << "\n";
+
+  return result.success ? 0 : 1;
+}
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index 5023cefc6..4ff8c0fd8 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -1,23 +1,26 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
 
+// CUDA headers - only included for local solve path
 #include <raft/core/device_setter.hpp>
 #include <raft/core/handle.hpp>
-
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 
 #include <unistd.h>
 #include <argparse/argparse.hpp>
+#include <iomanip>
 #include <iostream>
 #include <stdexcept>
 #include <string>
@@ -66,6 +69,108 @@ static char cuda_module_loading_env[] = "CUDA_MODULE_LOADING=EAGER";
  */
 inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
 
+/**
+ * @brief Create a data_model_view_t from mps_data_model_t
+ *
+ * This creates a non-owning view with spans pointing to the CPU data in the mps_data_model.
+ * Used for remote solve where data stays in CPU memory.
+ *
+ * @param mps_data_model The owning mps_data_model_t
+ * @return data_model_view_t with spans pointing to the mps_data_model's vectors
+ */
+template <typename i_t, typename f_t>
+cuopt::linear_programming::data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  cuopt::linear_programming::data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  if (mps_data_model.has_quadratic_objective()) {
+    view.set_quadratic_objective_matrix(mps_data_model.get_quadratic_objective_values().data(),
+                                        mps_data_model.get_quadratic_objective_values().size(),
+                                        mps_data_model.get_quadratic_objective_indices().data(),
+                                        mps_data_model.get_quadratic_objective_indices().size(),
+                                        mps_data_model.get_quadratic_objective_offsets().data(),
+                                        mps_data_model.get_quadratic_objective_offsets().size());
+  }
+
+  return view;
+}
+
 /**
  * @brief Handle logger when error happens before logger is initialized
  * @param settings Solver settings
@@ -83,13 +188,18 @@ inline cuopt::init_logger_t dummy_logger(
  * @param file_path Path to the MPS format input file containing the optimization problem
  * @param initial_solution_file Path to initial solution file in SOL format
  * @param settings_strings Map of solver parameters
+ * @param is_remote_solve Whether remote solve is enabled (skips CUDA handle creation)
  */
 int run_single_file(const std::string& file_path,
                     const std::string& initial_solution_file,
                     bool solve_relaxation,
-                    const std::map<std::string, std::string>& settings_strings)
+                    const std::map<std::string, std::string>& settings_strings,
+                    bool is_remote_solve)
 {
-  const raft::handle_t handle_{};
+  // Only create raft handle for local solve - it triggers CUDA initialization
+  std::unique_ptr<raft::handle_t> handle_ptr;
+  if (!is_remote_solve) { handle_ptr = std::make_unique<raft::handle_t>(); }
+
   cuopt::linear_programming::solver_settings_t<int, double> settings;
 
   try {
@@ -122,13 +232,15 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
-  auto op_problem =
-    cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
-
-  const bool is_mip =
-    (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
-     op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) &&
-    !solve_relaxation;
+  // Determine if this is a MIP problem by checking variable types
+  bool has_integers = false;
+  for (const auto& vt : mps_data_model.get_variable_types()) {
+    if (vt == 'I' || vt == 'B') {
+      has_integers = true;
+      break;
+    }
+  }
+  const bool is_mip = has_integers && !solve_relaxation;
 
   try {
     auto initial_solution =
@@ -154,13 +266,27 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
+  // Create a non-owning view from the mps_data_model
+  // solve_lp/solve_mip will handle remote vs local solve based on env vars
+  auto view = create_view_from_mps_data_model(mps_data_model);
+
   try {
+    // Pass handle_ptr.get() - can be nullptr for remote solve
     if (is_mip) {
       auto& mip_settings = settings.get_mip_settings();
-      auto solution      = cuopt::linear_programming::solve_mip(op_problem, mip_settings);
+      auto solution = cuopt::linear_programming::solve_mip(handle_ptr.get(), view, mip_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        CUOPT_LOG_ERROR("MIP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
     } else {
       auto& lp_settings = settings.get_pdlp_settings();
-      auto solution     = cuopt::linear_programming::solve_lp(op_problem, lp_settings);
+      auto solution     = cuopt::linear_programming::solve_lp(handle_ptr.get(), view, lp_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
+      // Note: Solution output is now handled by solve_lp/solve_lp_remote via CUOPT_LOG_INFO
     }
   } catch (const std::exception& e) {
     CUOPT_LOG_ERROR("Error: %s", e.what());
@@ -334,19 +460,26 @@ int main(int argc, char* argv[])
   const auto initial_solution_file = program.get<std::string>("--initial-solution");
   const auto solve_relaxation      = program.get<bool>("--relaxation");
 
-  // All arguments are parsed as string, default values are parsed as int if unused.
-  const auto num_gpus = program.is_used("--num-gpus")
-                          ? std::stoi(program.get<std::string>("--num-gpus"))
-                          : program.get<int>("--num-gpus");
+  // Check for remote solve BEFORE any CUDA initialization
+  const bool is_remote_solve = cuopt::linear_programming::is_remote_solve_enabled();
 
   std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
 
-  for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
-    cudaSetDevice(i);
-    memory_resources.push_back(make_async());
-    rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+  if (!is_remote_solve) {
+    // Only initialize CUDA resources for local solve
+    // All arguments are parsed as string, default values are parsed as int if unused.
+    const auto num_gpus = program.is_used("--num-gpus")
+                            ? std::stoi(program.get<std::string>("--num-gpus"))
+                            : program.get<int>("--num-gpus");
+
+    for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
+      cudaSetDevice(i);
+      memory_resources.push_back(make_async());
+      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+    }
+    cudaSetDevice(0);
   }
-  cudaSetDevice(0);
 
-  return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings);
+  return run_single_file(
+    file_name, initial_solution_file, solve_relaxation, settings_strings, is_remote_solve);
 }
diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp
new file mode 100644
index 000000000..5bab0bbd4
--- /dev/null
+++ b/cpp/cuopt_remote_server.cpp
@@ -0,0 +1,2301 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file cuopt_remote_server.cpp
+ * @brief Remote solve server with sync and async support using pluggable serialization
+ *
+ * Features:
+ * - Sync mode: Submit job with blocking=true, wait for result, return immediately
+ * - Async mode: Submit job, get job_id, poll for status, retrieve result, delete
+ * - Uses pluggable serialization (default: Protocol Buffers)
+ * - Worker processes with shared memory job queues
+ * - Real-time log streaming to client (sync mode only)
+ *
+ * Async workflow:
+ *   1. Client sends SUBMIT_JOB request → Server returns job_id
+ *   2. Client sends CHECK_STATUS request → Server returns job status
+ *   3. Client sends GET_RESULT request → Server returns solution
+ *   4. Client sends DELETE_RESULT request → Server cleans up job
+ *
+ * Sync workflow:
+ *   1. Client sends SUBMIT_JOB with blocking=true → Server solves and returns result directly
+ */
+
+#include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <mps_parser/mps_data_model.hpp>
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <csignal>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <random>
+#include <thread>
+#include <vector>
+
+using namespace cuopt::linear_programming;
+
+// ============================================================================
+// Shared Memory Structures (must match between main process and workers)
+// ============================================================================
+
+constexpr size_t MAX_JOBS    = 100;
+constexpr size_t MAX_RESULTS = 100;
+
+// Job queue entry - small fixed size, data stored in separate per-job shared memory or sent via
+// pipe
+struct JobQueueEntry {
+  char job_id[64];
+  uint32_t problem_type;          // 0 = LP, 1 = MIP
+  uint64_t data_size;             // Size of problem data (uint64 for large problems)
+  char shm_data_name[128];        // Name of per-job shared memory segment (shm mode only)
+  std::atomic<bool> ready;        // Job is ready to be processed
+  std::atomic<bool> claimed;      // Worker has claimed this job
+  std::atomic<pid_t> worker_pid;  // PID of worker that claimed this job (0 if none)
+  std::atomic<bool> cancelled;    // Job has been cancelled (worker should skip)
+  // Pipe mode fields
+  std::atomic<int> worker_index;  // Index of worker that claimed this job (-1 if none)
+  std::atomic<bool> data_sent;    // Server has sent data to worker's pipe (pipe mode)
+};
+
+// Result queue entry - small fixed size, data stored in separate per-result shared memory or pipe
+struct ResultQueueEntry {
+  char job_id[64];
+  uint32_t status;          // 0 = success, 1 = error, 2 = cancelled
+  uint64_t data_size;       // Size of result data (uint64 for large results)
+  char shm_data_name[128];  // Name of per-result shared memory segment (shm mode only)
+  char error_message[1024];
+  std::atomic<bool> ready;        // Result is ready
+  std::atomic<bool> retrieved;    // Result has been retrieved
+  std::atomic<int> worker_index;  // Index of worker that produced this result (pipe mode)
+};
+
+// Shared memory control block
+struct SharedMemoryControl {
+  std::atomic<bool> shutdown_requested;
+  std::atomic<int> active_workers;
+};
+
+// ============================================================================
+// Message types for streaming protocol
+// ============================================================================
+
+enum class MessageType : uint8_t {
+  LOG_MESSAGE = 0,  // Log output from server
+  SOLUTION    = 1,  // Final solution data
+};
+
+// Helper to send a framed message with type
+static bool send_typed_message(int sockfd, MessageType type, const void* data, size_t size)
+{
+  uint8_t msg_type      = static_cast<uint8_t>(type);
+  uint64_t payload_size = static_cast<uint64_t>(size);
+
+  if (::write(sockfd, &msg_type, 1) != 1) return false;
+  if (::write(sockfd, &payload_size, sizeof(payload_size)) != sizeof(payload_size)) return false;
+  if (size > 0) {
+    const uint8_t* ptr = static_cast<const uint8_t*>(data);
+    size_t remaining   = size;
+    while (remaining > 0) {
+      ssize_t written = ::write(sockfd, ptr, remaining);
+      if (written <= 0) return false;
+      ptr += written;
+      remaining -= written;
+    }
+  }
+  return true;
+}
+
+// ============================================================================
+// RAII stdout streamer for log streaming to client
+// ============================================================================
+
+class stdout_streamer_t {
+ public:
+  stdout_streamer_t(int client_fd, bool enabled)
+    : client_fd_(client_fd), enabled_(enabled), running_(false), original_stdout_(-1)
+  {
+    if (!enabled_) return;
+
+    fflush(stdout);
+
+    if (pipe(pipe_fds_) < 0) {
+      std::cerr << "[Server] Failed to create pipe for stdout streaming\n";
+      enabled_ = false;
+      return;
+    }
+
+    original_stdout_ = dup(STDOUT_FILENO);
+    if (original_stdout_ < 0) {
+      close(pipe_fds_[0]);
+      close(pipe_fds_[1]);
+      enabled_ = false;
+      return;
+    }
+
+    if (dup2(pipe_fds_[1], STDOUT_FILENO) < 0) {
+      close(original_stdout_);
+      close(pipe_fds_[0]);
+      close(pipe_fds_[1]);
+      enabled_ = false;
+      return;
+    }
+
+    close(pipe_fds_[1]);
+
+    running_       = true;
+    reader_thread_ = std::thread(&stdout_streamer_t::reader_loop, this);
+  }
+
+  ~stdout_streamer_t()
+  {
+    if (!enabled_) return;
+
+    fflush(stdout);
+    dup2(original_stdout_, STDOUT_FILENO);
+    close(original_stdout_);
+
+    running_ = false;
+    close(pipe_fds_[0]);
+
+    if (reader_thread_.joinable()) { reader_thread_.join(); }
+  }
+
+ private:
+  void reader_loop()
+  {
+    char buffer[4096];
+    while (running_) {
+      ssize_t n = read(pipe_fds_[0], buffer, sizeof(buffer) - 1);
+      if (n <= 0) break;
+
+      buffer[n] = '\0';
+
+      if (original_stdout_ >= 0) { write(original_stdout_, buffer, n); }
+      send_typed_message(client_fd_, MessageType::LOG_MESSAGE, buffer, n);
+    }
+  }
+
+  int client_fd_;
+  bool enabled_;
+  std::atomic<bool> running_;
+  int original_stdout_;
+  int pipe_fds_[2];
+  std::thread reader_thread_;
+};
+
+// ============================================================================
+// Job status tracking (main process only)
+// ============================================================================
+
+enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND, CANCELLED };
+
+struct JobInfo {
+  std::string job_id;
+  JobStatus status;
+  std::chrono::steady_clock::time_point submit_time;
+  std::vector<uint8_t> result_data;
+  bool is_mip;
+  std::string error_message;
+  bool is_blocking;  // True if a client is waiting synchronously
+};
+
+// Per-job condition variable for synchronous waiting
+struct JobWaiter {
+  std::mutex mutex;
+  std::condition_variable cv;
+  std::vector<uint8_t> result_data;
+  std::string error_message;
+  bool success;
+  bool ready;
+
+  JobWaiter() : success(false), ready(false) {}
+};
+
+// ============================================================================
+// Global state
+// ============================================================================
+
+std::atomic<bool> keep_running{true};
+std::map<std::string, JobInfo> job_tracker;
+std::mutex tracker_mutex;
+std::condition_variable result_cv;  // Notified when results arrive
+
+std::map<std::string, std::shared_ptr<JobWaiter>> waiting_threads;
+std::mutex waiters_mutex;
+
+// Shared memory
+JobQueueEntry* job_queue       = nullptr;
+ResultQueueEntry* result_queue = nullptr;
+SharedMemoryControl* shm_ctrl  = nullptr;
+
+// Worker PIDs
+std::vector<pid_t> worker_pids;
+
+// Server configuration
+struct ServerConfig {
+  int port         = 9090;
+  int num_workers  = 1;
+  bool verbose     = true;
+  bool stream_logs = true;
+  bool use_pipes   = true;  // Default to pipes (container-friendly), --use-shm to disable
+};
+
+ServerConfig config;
+
+// Worker state for pipe-based IPC
+struct WorkerPipes {
+  int to_worker_fd;     // Server writes job data to this (pipe write end)
+  int from_worker_fd;   // Server reads results from this (pipe read end)
+  int worker_read_fd;   // Worker reads job data from this (inherited, closed in parent)
+  int worker_write_fd;  // Worker writes results to this (inherited, closed in parent)
+};
+
+std::vector<WorkerPipes> worker_pipes;
+
+// Pending job data for pipe mode (job_id -> serialized data)
+std::mutex pending_data_mutex;
+std::map<std::string, std::vector<uint8_t>> pending_job_data;
+
+// Shared memory names
+const char* SHM_JOB_QUEUE    = "/cuopt_job_queue";
+const char* SHM_RESULT_QUEUE = "/cuopt_result_queue";
+const char* SHM_CONTROL      = "/cuopt_control";
+
+// ============================================================================
+// Signal handling
+// ============================================================================
+
+void signal_handler(int signal)
+{
+  if (signal == SIGINT || signal == SIGTERM) {
+    std::cout << "\n[Server] Received shutdown signal\n";
+    keep_running = false;
+    if (shm_ctrl) { shm_ctrl->shutdown_requested = true; }
+    result_cv.notify_all();
+  }
+}
+
+// ============================================================================
+// Utilities
+// ============================================================================
+
+std::string generate_job_id()
+{
+  static std::random_device rd;
+  static std::mt19937 gen(rd());
+  static std::uniform_int_distribution<uint64_t> dis;
+
+  uint64_t id = dis(gen);
+  char buf[32];
+  snprintf(buf, sizeof(buf), "job_%016lx", id);
+  return std::string(buf);
+}
+
+static bool write_all(int sockfd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+  while (remaining > 0) {
+    ssize_t written = ::write(sockfd, ptr, remaining);
+    if (written <= 0) return false;
+    ptr += written;
+    remaining -= written;
+  }
+  return true;
+}
+
+static bool read_all(int sockfd, void* data, size_t size)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+  while (remaining > 0) {
+    ssize_t nread = ::read(sockfd, ptr, remaining);
+    if (nread <= 0) return false;
+    ptr += nread;
+    remaining -= nread;
+  }
+  return true;
+}
+
+static bool send_solution_message(int sockfd, const std::vector<uint8_t>& data)
+{
+  return send_typed_message(sockfd, MessageType::SOLUTION, data.data(), data.size());
+}
+
+static bool receive_request(int sockfd, std::vector<uint8_t>& data)
+{
+  uint64_t size;
+  if (!read_all(sockfd, &size, sizeof(size))) return false;
+
+  // Sanity check - reject requests larger than 16GB
+  if (size > 16ULL * 1024 * 1024 * 1024) {
+    std::cerr << "[Server] Request too large: " << size << " bytes\n";
+    return false;
+  }
+
+  data.resize(size);
+  if (!read_all(sockfd, data.data(), size)) return false;
+  return true;
+}
+
+// ============================================================================
+// Per-job Shared Memory Helpers (forward declarations)
+// ============================================================================
+
+static std::string create_job_shm(const std::string& job_id,
+                                  const std::vector<uint8_t>& data,
+                                  const char* prefix);
+static bool read_job_shm(const char* shm_name, size_t data_size, std::vector<uint8_t>& data);
+static std::string write_result_shm(const std::string& job_id, const std::vector<uint8_t>& data);
+static void cleanup_job_shm(const char* shm_name);
+
+// ============================================================================
+// Shared Memory Management
+// ============================================================================
+
+bool init_shared_memory()
+{
+  // Create job queue shared memory
+  int fd_jobs = shm_open(SHM_JOB_QUEUE, O_CREAT | O_RDWR, 0666);
+  if (fd_jobs < 0) {
+    std::cerr << "[Server] Failed to create job queue shared memory\n";
+    return false;
+  }
+  size_t job_queue_size = sizeof(JobQueueEntry) * MAX_JOBS;
+  if (ftruncate(fd_jobs, job_queue_size) < 0) {
+    std::cerr << "[Server] Failed to size job queue shared memory\n";
+    close(fd_jobs);
+    return false;
+  }
+  job_queue = static_cast<JobQueueEntry*>(
+    mmap(nullptr, job_queue_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_jobs, 0));
+  close(fd_jobs);
+  if (job_queue == MAP_FAILED) {
+    std::cerr << "[Server] Failed to map job queue\n";
+    return false;
+  }
+
+  // Initialize job queue entries
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    job_queue[i].ready        = false;
+    job_queue[i].claimed      = false;
+    job_queue[i].worker_pid   = 0;
+    job_queue[i].cancelled    = false;
+    job_queue[i].worker_index = -1;
+    job_queue[i].data_sent    = false;
+  }
+
+  // Create result queue shared memory
+  int fd_results = shm_open(SHM_RESULT_QUEUE, O_CREAT | O_RDWR, 0666);
+  if (fd_results < 0) {
+    std::cerr << "[Server] Failed to create result queue shared memory\n";
+    return false;
+  }
+  size_t result_queue_size = sizeof(ResultQueueEntry) * MAX_RESULTS;
+  if (ftruncate(fd_results, result_queue_size) < 0) {
+    std::cerr << "[Server] Failed to size result queue shared memory\n";
+    close(fd_results);
+    return false;
+  }
+  result_queue = static_cast<ResultQueueEntry*>(
+    mmap(nullptr, result_queue_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_results, 0));
+  close(fd_results);
+  if (result_queue == MAP_FAILED) {
+    std::cerr << "[Server] Failed to map result queue\n";
+    return false;
+  }
+
+  // Initialize result queue entries
+  for (size_t i = 0; i < MAX_RESULTS; ++i) {
+    result_queue[i].ready        = false;
+    result_queue[i].retrieved    = false;
+    result_queue[i].worker_index = -1;
+  }
+
+  // Create control shared memory
+  int fd_ctrl = shm_open(SHM_CONTROL, O_CREAT | O_RDWR, 0666);
+  if (fd_ctrl < 0) {
+    std::cerr << "[Server] Failed to create control shared memory\n";
+    return false;
+  }
+  if (ftruncate(fd_ctrl, sizeof(SharedMemoryControl)) < 0) {
+    std::cerr << "[Server] Failed to size control shared memory\n";
+    close(fd_ctrl);
+    return false;
+  }
+  shm_ctrl = static_cast<SharedMemoryControl*>(
+    mmap(nullptr, sizeof(SharedMemoryControl), PROT_READ | PROT_WRITE, MAP_SHARED, fd_ctrl, 0));
+  close(fd_ctrl);
+  if (shm_ctrl == MAP_FAILED) {
+    std::cerr << "[Server] Failed to map control\n";
+    return false;
+  }
+
+  shm_ctrl->shutdown_requested = false;
+  shm_ctrl->active_workers     = 0;
+
+  return true;
+}
+
+void cleanup_shared_memory()
+{
+  if (job_queue) {
+    munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+    shm_unlink(SHM_JOB_QUEUE);
+  }
+  if (result_queue) {
+    munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS);
+    shm_unlink(SHM_RESULT_QUEUE);
+  }
+  if (shm_ctrl) {
+    munmap(shm_ctrl, sizeof(SharedMemoryControl));
+    shm_unlink(SHM_CONTROL);
+  }
+}
+
+// ============================================================================
+// Forward declarations for log file management
+// ============================================================================
+std::string get_log_file_path(const std::string& job_id);
+void ensure_log_dir_exists();
+void delete_log_file(const std::string& job_id);
+
+// ============================================================================
+// Forward declarations for pipe I/O helpers
+// ============================================================================
+static bool write_to_pipe(int fd, const void* data, size_t size);
+static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms = 120000);
+static bool send_job_data_pipe(int worker_idx, const std::vector<uint8_t>& data);
+static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector<uint8_t>& data);
+static bool send_result_pipe(int fd, const std::vector<uint8_t>& data);
+static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector<uint8_t>& data);
+
+// ============================================================================
+// Worker Process
+// ============================================================================
+
+void worker_process(int worker_id)
+{
+  std::cout << "[Worker " << worker_id << "] Started (PID: " << getpid() << ")\n";
+
+  // Increment active worker count
+  shm_ctrl->active_workers++;
+
+  // NOTE: We create raft::handle_t AFTER stdout redirect (per-job) so that
+  // CUDA logging uses the redirected output streams.
+
+  // Get serializer
+  auto serializer = get_serializer<int, double>();
+
+  while (!shm_ctrl->shutdown_requested) {
+    // Find a job to process
+    int job_slot = -1;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready && !job_queue[i].claimed) {
+        // Try to claim this job atomically
+        bool expected = false;
+        if (job_queue[i].claimed.compare_exchange_strong(expected, true)) {
+          job_queue[i].worker_pid   = getpid();   // Record our PID
+          job_queue[i].worker_index = worker_id;  // Record worker index for pipe mode
+          job_slot                  = i;
+          break;
+        }
+      }
+    }
+
+    if (job_slot < 0) {
+      // No job available, sleep briefly
+      usleep(10000);  // 10ms
+      continue;
+    }
+
+    // Process the job
+    JobQueueEntry& job = job_queue[job_slot];
+    std::string job_id(job.job_id);
+    bool is_mip = (job.problem_type == 1);
+
+    // Check if job was cancelled before we start processing
+    if (job.cancelled) {
+      std::cout << "[Worker " << worker_id << "] Job cancelled before processing: " << job_id
+                << "\n";
+      std::cout.flush();
+
+      // Cleanup job input shm (shm mode only)
+      if (!config.use_pipes) { cleanup_job_shm(job.shm_data_name); }
+
+      // Store cancelled result in result queue
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          strncpy(result_queue[i].job_id, job_id.c_str(), sizeof(result_queue[i].job_id) - 1);
+          result_queue[i].status           = 2;  // Cancelled status
+          result_queue[i].data_size        = 0;
+          result_queue[i].shm_data_name[0] = '\0';
+          result_queue[i].worker_index     = worker_id;  // For pipe mode
+          strncpy(result_queue[i].error_message,
+                  "Job was cancelled",
+                  sizeof(result_queue[i].error_message) - 1);
+          result_queue[i].retrieved = false;
+          result_queue[i].ready     = true;
+          break;
+        }
+      }
+
+      // Clear job slot (don't exit/restart worker)
+      job.worker_pid   = 0;
+      job.worker_index = -1;
+      job.data_sent    = false;
+      job.ready        = false;
+      job.claimed      = false;
+      job.cancelled    = false;
+      continue;  // Go back to waiting for next job
+    }
+
+    std::cout << "[Worker " << worker_id << "] Processing job: " << job_id
+              << " (type: " << (is_mip ? "MIP" : "LP") << ")\n";
+    std::cout.flush();
+
+    // Redirect stdout AND stderr to per-job log file for client log retrieval
+    // (Solver may use either stream for output)
+    std::string log_file = get_log_file_path(job_id);
+    int saved_stdout     = dup(STDOUT_FILENO);
+    int saved_stderr     = dup(STDERR_FILENO);
+    int log_fd           = open(log_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
+    if (log_fd >= 0) {
+      // Flush C++ streams before changing fd
+      std::cout.flush();
+      std::cerr.flush();
+      fflush(stdout);
+      fflush(stderr);
+
+      dup2(log_fd, STDOUT_FILENO);
+      dup2(log_fd, STDERR_FILENO);
+      close(log_fd);
+
+      // Use unbuffered output for real-time log streaming
+      setvbuf(stdout, NULL, _IONBF, 0);
+      setvbuf(stderr, NULL, _IONBF, 0);
+
+      // Test that redirection works
+      printf("[Worker %d] Log file initialized: %s\n", worker_id, log_file.c_str());
+      fflush(stdout);
+    }
+
+    // Create RAFT handle AFTER stdout redirect so CUDA sees the new streams
+    const char* msg0 = "[Worker] Creating raft::handle_t...\n";
+    write(STDOUT_FILENO, msg0, 36);
+    fsync(STDOUT_FILENO);
+
+    raft::handle_t handle;
+
+    const char* msg01 = "[Worker] Handle created, starting solve...\n";
+    write(STDOUT_FILENO, msg01, 44);
+    fsync(STDOUT_FILENO);
+
+    // Read problem data (pipe mode or shm mode)
+    std::vector<uint8_t> request_data;
+    bool read_success = false;
+
+    if (config.use_pipes) {
+      // Pipe mode: read from pipe (blocks until server writes data)
+      // No need to wait for data_sent flag - pipe read naturally blocks
+      int read_fd  = worker_pipes[worker_id].worker_read_fd;
+      read_success = recv_job_data_pipe(read_fd, job.data_size, request_data);
+      if (!read_success) {
+        std::cerr << "[Worker " << worker_id << "] Failed to read job data from pipe\n";
+      }
+    } else {
+      // SHM mode: read from shared memory
+      read_success = read_job_shm(job.shm_data_name, job.data_size, request_data);
+      if (!read_success) {
+        std::cerr << "[Worker " << worker_id << "] Failed to read job data from shm\n";
+      }
+      // Cleanup job input shm now that we've read it
+      cleanup_job_shm(job.shm_data_name);
+    }
+
+    if (!read_success) {
+      // Store error result
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          strncpy(result_queue[i].job_id, job_id.c_str(), sizeof(result_queue[i].job_id) - 1);
+          result_queue[i].status           = 1;  // Error status
+          result_queue[i].data_size        = 0;
+          result_queue[i].shm_data_name[0] = '\0';
+          result_queue[i].worker_index     = worker_id;
+          strncpy(result_queue[i].error_message,
+                  "Failed to read job data",
+                  sizeof(result_queue[i].error_message) - 1);
+          result_queue[i].retrieved = false;
+          result_queue[i].ready     = true;
+          break;
+        }
+      }
+      // Clear job slot
+      job.worker_pid   = 0;
+      job.worker_index = -1;
+      job.data_sent    = false;
+      job.ready        = false;
+      job.claimed      = false;
+      continue;
+    }
+
+    std::vector<uint8_t> result_data;
+    std::string error_message;
+    bool success = false;
+
+    try {
+      if (is_mip) {
+        cuopt::mps_parser::mps_data_model_t<int, double> mps_data;
+        mip_solver_settings_t<int, double> settings;
+
+        if (serializer->deserialize_mip_request(request_data, mps_data, settings)) {
+          auto solution = solve_mip(&handle, mps_data, settings);
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_mip_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize MIP request";
+        }
+      } else {
+        cuopt::mps_parser::mps_data_model_t<int, double> mps_data;
+        pdlp_solver_settings_t<int, double> settings;
+
+        if (serializer->deserialize_lp_request(request_data, mps_data, settings)) {
+          const char* msg1 = "[Worker] Calling solve_lp via write()...\n";
+          write(STDOUT_FILENO, msg1, strlen(msg1));
+          fsync(STDOUT_FILENO);
+          auto solution    = solve_lp(&handle, mps_data, settings);
+          const char* msg2 = "[Worker] solve_lp done via write()\n";
+          write(STDOUT_FILENO, msg2, strlen(msg2));
+          fsync(STDOUT_FILENO);
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_lp_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize LP request";
+        }
+      }
+    } catch (const std::exception& e) {
+      error_message = std::string("Exception: ") + e.what();
+    }
+
+    // Restore stdout and stderr to console
+    fflush(stdout);
+    fflush(stderr);
+    dup2(saved_stdout, STDOUT_FILENO);
+    dup2(saved_stderr, STDERR_FILENO);
+    close(saved_stdout);
+    close(saved_stderr);
+
+
+    // Store result (pipe mode: write to pipe, shm mode: write to shared memory)
+    if (config.use_pipes) {
+      // PIPE MODE: Set result_queue metadata FIRST, THEN write to pipe.
+      // This avoids deadlock: the main thread's result_retrieval_thread
+      // needs to see ready=true before it will read from the pipe,
+      // but if we write to pipe first with a large result, we'll block
+      // waiting for the reader that will never come.
+
+      // Find a free result slot and populate metadata
+      int result_slot = -1;
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          result_slot = i;
+          ResultQueueEntry& result = result_queue[i];
+          strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1);
+          result.status           = success ? 0 : 1;
+          result.data_size        = success ? result_data.size() : 0;
+          result.shm_data_name[0] = '\0';  // Not used in pipe mode
+          result.worker_index     = worker_id;
+          if (!success) {
+            strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1);
+          }
+          result.retrieved = false;
+          // Set ready=true BEFORE writing to pipe so reader thread starts reading
+          // This prevents deadlock with large results that exceed pipe buffer size
+          result.ready = true;
+          break;
+        }
+      }
+
+      // Now write result data to pipe (reader thread should be ready to receive)
+      if (success && !result_data.empty() && result_slot >= 0) {
+        int write_fd       = worker_pipes[worker_id].worker_write_fd;
+        bool write_success = send_result_pipe(write_fd, result_data);
+        if (!write_success) {
+          std::cerr << "[Worker " << worker_id << "] Failed to write result to pipe\n";
+          // Mark as failed in result queue
+          result_queue[result_slot].status = 1;
+          strncpy(result_queue[result_slot].error_message,
+                  "Failed to write result to pipe",
+                  sizeof(result_queue[result_slot].error_message) - 1);
+        }
+      }
+    } else {
+      // SHM mode: store result in shared memory
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          ResultQueueEntry& result = result_queue[i];
+          strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1);
+          result.status       = success ? 0 : 1;
+          result.worker_index = worker_id;
+          if (success && !result_data.empty()) {
+            // Create per-result shared memory
+            std::string shm_name = write_result_shm(job_id, result_data);
+            if (shm_name.empty()) {
+              // Failed to create shm - report error
+              result.status           = 1;
+              result.data_size        = 0;
+              result.shm_data_name[0] = '\0';
+              strncpy(result.error_message,
+                      "Failed to create shared memory for result",
+                      sizeof(result.error_message) - 1);
+            } else {
+              result.data_size = result_data.size();
+              strncpy(result.shm_data_name, shm_name.c_str(), sizeof(result.shm_data_name) - 1);
+            }
+          } else if (!success) {
+            strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1);
+            result.data_size        = 0;
+            result.shm_data_name[0] = '\0';
+          } else {
+            result.data_size        = 0;
+            result.shm_data_name[0] = '\0';
+          }
+          result.retrieved = false;
+          result.ready     = true;  // Mark as ready last
+          break;
+        }
+      }
+    }
+
+    // Clear job slot
+    job.worker_pid   = 0;
+    job.worker_index = -1;
+    job.data_sent    = false;
+    job.ready        = false;
+    job.claimed      = false;
+    job.cancelled    = false;
+
+    std::cout << "[Worker " << worker_id << "] Completed job: " << job_id
+              << " (success: " << success << ")\n";
+  }
+
+  shm_ctrl->active_workers--;
+  std::cout << "[Worker " << worker_id << "] Stopped\n";
+  _exit(0);
+}
+
+// Create pipes for a worker (pipe mode only)
+bool create_worker_pipes(int worker_id)
+{
+  if (!config.use_pipes) return true;
+
+  // Ensure worker_pipes has enough slots
+  while (static_cast<int>(worker_pipes.size()) <= worker_id) {
+    worker_pipes.push_back({-1, -1, -1, -1});
+  }
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+
+  // Create pipe for server -> worker data
+  int input_pipe[2];
+  if (pipe(input_pipe) < 0) {
+    std::cerr << "[Server] Failed to create input pipe for worker " << worker_id << "\n";
+    return false;
+  }
+  wp.worker_read_fd = input_pipe[0];  // Worker reads from this
+  wp.to_worker_fd   = input_pipe[1];  // Server writes to this
+
+  // Create pipe for worker -> server results
+  int output_pipe[2];
+  if (pipe(output_pipe) < 0) {
+    std::cerr << "[Server] Failed to create output pipe for worker " << worker_id << "\n";
+    close(input_pipe[0]);
+    close(input_pipe[1]);
+    return false;
+  }
+  wp.from_worker_fd  = output_pipe[0];  // Server reads from this
+  wp.worker_write_fd = output_pipe[1];  // Worker writes to this
+
+  return true;
+}
+
+// Close server-side pipe ends for a worker (called when restarting)
+void close_worker_pipes_server(int worker_id)
+{
+  if (!config.use_pipes) return;
+  if (worker_id < 0 || worker_id >= static_cast<int>(worker_pipes.size())) return;
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+  if (wp.to_worker_fd >= 0) {
+    close(wp.to_worker_fd);
+    wp.to_worker_fd = -1;
+  }
+  if (wp.from_worker_fd >= 0) {
+    close(wp.from_worker_fd);
+    wp.from_worker_fd = -1;
+  }
+}
+
+// Close worker-side pipe ends in parent after fork
+void close_worker_pipes_child_ends(int worker_id)
+{
+  if (!config.use_pipes) return;
+  if (worker_id < 0 || worker_id >= static_cast<int>(worker_pipes.size())) return;
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+  if (wp.worker_read_fd >= 0) {
+    close(wp.worker_read_fd);
+    wp.worker_read_fd = -1;
+  }
+  if (wp.worker_write_fd >= 0) {
+    close(wp.worker_write_fd);
+    wp.worker_write_fd = -1;
+  }
+}
+
+void spawn_workers()
+{
+  for (int i = 0; i < config.num_workers; ++i) {
+    // Create pipes before forking (pipe mode)
+    if (config.use_pipes && !create_worker_pipes(i)) {
+      std::cerr << "[Server] Failed to create pipes for worker " << i << "\n";
+      continue;
+    }
+
+    pid_t pid = fork();
+    if (pid < 0) {
+      std::cerr << "[Server] Failed to fork worker " << i << "\n";
+      close_worker_pipes_server(i);
+    } else if (pid == 0) {
+      // Child process
+      if (config.use_pipes) {
+        // Close all other workers' pipe fds
+        for (int j = 0; j < static_cast<int>(worker_pipes.size()); ++j) {
+          if (j != i) {
+            if (worker_pipes[j].worker_read_fd >= 0) close(worker_pipes[j].worker_read_fd);
+            if (worker_pipes[j].worker_write_fd >= 0) close(worker_pipes[j].worker_write_fd);
+            if (worker_pipes[j].to_worker_fd >= 0) close(worker_pipes[j].to_worker_fd);
+            if (worker_pipes[j].from_worker_fd >= 0) close(worker_pipes[j].from_worker_fd);
+          }
+        }
+        // Close server ends of our pipes
+        close(worker_pipes[i].to_worker_fd);
+        close(worker_pipes[i].from_worker_fd);
+      }
+      worker_process(i);
+      _exit(0);  // Should not reach here
+    } else {
+      // Parent process
+      worker_pids.push_back(pid);
+      // Close worker ends of pipes (parent doesn't need them)
+      close_worker_pipes_child_ends(i);
+    }
+  }
+}
+
+void wait_for_workers()
+{
+  for (pid_t pid : worker_pids) {
+    int status;
+    waitpid(pid, &status, 0);
+  }
+  worker_pids.clear();
+}
+
+// Spawn a single replacement worker and return its PID
+pid_t spawn_single_worker(int worker_id)
+{
+  // Create new pipes for the replacement worker (pipe mode)
+  if (config.use_pipes) {
+    // Close old pipes first
+    close_worker_pipes_server(worker_id);
+    if (!create_worker_pipes(worker_id)) {
+      std::cerr << "[Server] Failed to create pipes for replacement worker " << worker_id << "\n";
+      return -1;
+    }
+  }
+
+  pid_t pid = fork();
+  if (pid < 0) {
+    std::cerr << "[Server] Failed to fork replacement worker " << worker_id << "\n";
+    close_worker_pipes_server(worker_id);
+    return -1;
+  } else if (pid == 0) {
+    // Child process
+    if (config.use_pipes) {
+      // Close all other workers' pipe fds
+      for (int j = 0; j < static_cast<int>(worker_pipes.size()); ++j) {
+        if (j != worker_id) {
+          if (worker_pipes[j].worker_read_fd >= 0) close(worker_pipes[j].worker_read_fd);
+          if (worker_pipes[j].worker_write_fd >= 0) close(worker_pipes[j].worker_write_fd);
+          if (worker_pipes[j].to_worker_fd >= 0) close(worker_pipes[j].to_worker_fd);
+          if (worker_pipes[j].from_worker_fd >= 0) close(worker_pipes[j].from_worker_fd);
+        }
+      }
+      // Close server ends of our pipes
+      close(worker_pipes[worker_id].to_worker_fd);
+      close(worker_pipes[worker_id].from_worker_fd);
+    }
+    worker_process(worker_id);
+    _exit(0);  // Should not reach here
+  }
+
+  // Parent: close worker ends of new pipes
+  close_worker_pipes_child_ends(worker_id);
+  return pid;
+}
+
+// Mark jobs being processed by a dead worker as failed (or cancelled if it was cancelled)
+void mark_worker_jobs_failed(pid_t dead_worker_pid)
+{
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (job_queue[i].ready && job_queue[i].claimed && job_queue[i].worker_pid == dead_worker_pid) {
+      std::string job_id(job_queue[i].job_id);
+      bool was_cancelled = job_queue[i].cancelled;
+
+      if (was_cancelled) {
+        std::cerr << "[Server] Worker " << dead_worker_pid
+                  << " killed for cancelled job: " << job_id << "\n";
+      } else {
+        std::cerr << "[Server] Worker " << dead_worker_pid
+                  << " died while processing job: " << job_id << "\n";
+      }
+
+      // Cleanup job data
+      if (config.use_pipes) {
+        // Pipe mode: remove from pending data if not yet sent
+        std::lock_guard<std::mutex> lock(pending_data_mutex);
+        pending_job_data.erase(job_id);
+      } else {
+        // SHM mode: cleanup job input shm (worker may not have done it)
+        cleanup_job_shm(job_queue[i].shm_data_name);
+      }
+
+      // Store result in result queue (cancelled or failed)
+      for (size_t j = 0; j < MAX_RESULTS; ++j) {
+        if (!result_queue[j].ready) {
+          strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1);
+          result_queue[j].status           = was_cancelled ? 2 : 1;  // 2=cancelled, 1=error
+          result_queue[j].data_size        = 0;
+          result_queue[j].shm_data_name[0] = '\0';
+          result_queue[j].worker_index     = -1;
+          strncpy(result_queue[j].error_message,
+                  was_cancelled ? "Job was cancelled" : "Worker process died unexpectedly",
+                  sizeof(result_queue[j].error_message) - 1);
+          result_queue[j].retrieved = false;
+          result_queue[j].ready     = true;
+          break;
+        }
+      }
+
+      // Clear the job slot
+      job_queue[i].worker_pid   = 0;
+      job_queue[i].worker_index = -1;
+      job_queue[i].data_sent    = false;
+      job_queue[i].ready        = false;
+      job_queue[i].claimed      = false;
+      job_queue[i].cancelled    = false;
+
+      // Update job tracker
+      {
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        auto it = job_tracker.find(job_id);
+        if (it != job_tracker.end()) {
+          if (was_cancelled) {
+            it->second.status        = JobStatus::CANCELLED;
+            it->second.error_message = "Job was cancelled";
+          } else {
+            it->second.status        = JobStatus::FAILED;
+            it->second.error_message = "Worker process died unexpectedly";
+          }
+        }
+      }
+    }
+  }
+}
+
+// Worker monitor thread - detects dead workers and restarts them
+void worker_monitor_thread()
+{
+  std::cout << "[Server] Worker monitor thread started\n";
+  std::cout.flush();
+
+  while (keep_running) {
+    // Check all worker PIDs for dead workers
+    for (size_t i = 0; i < worker_pids.size(); ++i) {
+      pid_t pid = worker_pids[i];
+      if (pid <= 0) continue;
+
+      int status;
+      pid_t result = waitpid(pid, &status, WNOHANG);
+
+      if (result == pid) {
+        // Worker has exited
+        int exit_code  = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+        bool signaled  = WIFSIGNALED(status);
+        int signal_num = signaled ? WTERMSIG(status) : 0;
+
+        if (signaled) {
+          std::cerr << "[Server] Worker " << pid << " killed by signal " << signal_num << "\n";
+          std::cerr.flush();
+        } else if (exit_code != 0) {
+          std::cerr << "[Server] Worker " << pid << " exited with code " << exit_code << "\n";
+          std::cerr.flush();
+        } else {
+          // Clean exit during shutdown - don't restart
+          if (shm_ctrl && shm_ctrl->shutdown_requested) {
+            worker_pids[i] = 0;
+            continue;
+          }
+          std::cerr << "[Server] Worker " << pid << " exited unexpectedly\n";
+          std::cerr.flush();
+        }
+
+        // Mark any jobs this worker was processing as failed
+        mark_worker_jobs_failed(pid);
+
+        // Spawn replacement worker (unless shutting down)
+        if (keep_running && shm_ctrl && !shm_ctrl->shutdown_requested) {
+          pid_t new_pid = spawn_single_worker(static_cast<int>(i));
+          if (new_pid > 0) {
+            worker_pids[i] = new_pid;
+            std::cout << "[Server] Restarted worker " << i << " with PID " << new_pid << "\n";
+            std::cout.flush();
+          } else {
+            worker_pids[i] = 0;  // Failed to restart
+          }
+        } else {
+          worker_pids[i] = 0;
+        }
+      }
+    }
+
+    // Check every 100ms
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+
+  std::cout << "[Server] Worker monitor thread stopped\n";
+  std::cout.flush();
+}
+
+// ============================================================================
+// Result Retrieval Thread (main process)
+// Also handles sending job data to workers in pipe mode
+// ============================================================================
+
+void result_retrieval_thread()
+{
+  std::cout << "[Server] Result retrieval thread started\n";
+
+  while (keep_running) {
+    bool found = false;
+
+    // PIPE MODE: Check for jobs that need data sent to workers
+    if (config.use_pipes) {
+      for (size_t i = 0; i < MAX_JOBS; ++i) {
+        if (job_queue[i].ready && job_queue[i].claimed && !job_queue[i].data_sent &&
+            !job_queue[i].cancelled) {
+          std::string job_id(job_queue[i].job_id);
+          int worker_idx = job_queue[i].worker_index;
+
+          if (worker_idx >= 0) {
+            // Get pending job data
+            std::vector<uint8_t> job_data;
+            {
+              std::lock_guard<std::mutex> lock(pending_data_mutex);
+              auto it = pending_job_data.find(job_id);
+              if (it != pending_job_data.end()) {
+                job_data = std::move(it->second);
+                pending_job_data.erase(it);
+              }
+            }
+
+            if (!job_data.empty()) {
+              // Send data to worker's pipe
+              if (send_job_data_pipe(worker_idx, job_data)) {
+                job_queue[i].data_sent = true;
+                if (config.verbose) {
+                  std::cout << "[Server] Sent " << job_data.size() << " bytes to worker "
+                            << worker_idx << " for job " << job_id << "\n";
+                }
+              } else {
+                std::cerr << "[Server] Failed to send job data to worker " << worker_idx << "\n";
+                // Mark job as failed
+                job_queue[i].cancelled = true;
+              }
+              found = true;
+            }
+          }
+        }
+      }
+    }
+
+    // Check for completed results
+    for (size_t i = 0; i < MAX_RESULTS; ++i) {
+      if (result_queue[i].ready && !result_queue[i].retrieved) {
+        std::string job_id(result_queue[i].job_id);
+        uint32_t result_status = result_queue[i].status;
+        bool success           = (result_status == 0);
+        bool cancelled         = (result_status == 2);
+        int worker_idx         = result_queue[i].worker_index;
+
+        std::vector<uint8_t> result_data;
+        std::string error_message;
+
+        if (success && result_queue[i].data_size > 0) {
+          if (config.use_pipes) {
+            // Pipe mode: read result from worker's output pipe
+            if (!recv_result_pipe(worker_idx, result_queue[i].data_size, result_data)) {
+              error_message = "Failed to read result data from pipe";
+              success       = false;
+            }
+          } else {
+            // SHM mode: read from shared memory
+            if (!read_job_shm(
+                  result_queue[i].shm_data_name, result_queue[i].data_size, result_data)) {
+              error_message = "Failed to read result data from shared memory";
+              success       = false;
+            }
+            // Cleanup result shm after reading
+            cleanup_job_shm(result_queue[i].shm_data_name);
+          }
+        } else if (!success) {
+          error_message = result_queue[i].error_message;
+        }
+
+        // Check if there's a blocking waiter
+        {
+          std::lock_guard<std::mutex> lock(waiters_mutex);
+          auto wit = waiting_threads.find(job_id);
+          if (wit != waiting_threads.end()) {
+            // Wake up the waiting thread
+            auto waiter           = wit->second;
+            waiter->result_data   = std::move(result_data);
+            waiter->error_message = error_message;
+            waiter->success       = success;
+            waiter->ready         = true;
+            waiter->cv.notify_one();
+          }
+        }
+
+        // Update job tracker
+        {
+          std::lock_guard<std::mutex> lock(tracker_mutex);
+          auto it = job_tracker.find(job_id);
+          if (it != job_tracker.end()) {
+            if (success) {
+              it->second.status      = JobStatus::COMPLETED;
+              it->second.result_data = result_data;
+            } else if (cancelled) {
+              it->second.status        = JobStatus::CANCELLED;
+              it->second.error_message = error_message;
+            } else {
+              it->second.status        = JobStatus::FAILED;
+              it->second.error_message = error_message;
+            }
+          }
+        }
+
+        result_queue[i].retrieved    = true;
+        result_queue[i].worker_index = -1;
+        result_queue[i].ready        = false;  // Free slot
+        found                        = true;
+      }
+    }
+
+    if (!found) {
+      usleep(10000);  // 10ms
+    }
+
+    result_cv.notify_all();
+  }
+
+  std::cout << "[Server] Result retrieval thread stopped\n";
+}
+
+// ============================================================================
+// Async Request Handlers
+// ============================================================================
+
+// Create per-job shared memory segment and copy data into it
+// Returns the shm name on success, empty string on failure
+static std::string create_job_shm(const std::string& job_id,
+                                  const std::vector<uint8_t>& data,
+                                  const char* prefix)
+{
+  std::string shm_name = std::string("/cuopt_") + prefix + "_" + job_id;
+
+  int fd = shm_open(shm_name.c_str(), O_CREAT | O_RDWR, 0666);
+  if (fd < 0) {
+    std::cerr << "[Server] Failed to create shm " << shm_name << ": " << strerror(errno) << "\n";
+    return "";
+  }
+
+  if (ftruncate(fd, data.size()) < 0) {
+    std::cerr << "[Server] Failed to size shm " << shm_name << ": " << strerror(errno) << "\n";
+    close(fd);
+    shm_unlink(shm_name.c_str());
+    return "";
+  }
+
+  void* ptr = mmap(nullptr, data.size(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  close(fd);
+
+  if (ptr == MAP_FAILED) {
+    std::cerr << "[Server] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n";
+    shm_unlink(shm_name.c_str());
+    return "";
+  }
+
+  memcpy(ptr, data.data(), data.size());
+  munmap(ptr, data.size());
+
+  return shm_name;
+}
+
+// Read data from per-job shared memory segment
+static bool read_job_shm(const char* shm_name, size_t data_size, std::vector<uint8_t>& data)
+{
+  int fd = shm_open(shm_name, O_RDONLY, 0666);
+  if (fd < 0) {
+    std::cerr << "[Worker] Failed to open shm " << shm_name << ": " << strerror(errno) << "\n";
+    return false;
+  }
+
+  void* ptr = mmap(nullptr, data_size, PROT_READ, MAP_SHARED, fd, 0);
+  close(fd);
+
+  if (ptr == MAP_FAILED) {
+    std::cerr << "[Worker] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n";
+    return false;
+  }
+
+  data.resize(data_size);
+  memcpy(data.data(), ptr, data_size);
+  munmap(ptr, data_size);
+
+  return true;
+}
+
+// Write data to per-result shared memory segment
+static std::string write_result_shm(const std::string& job_id, const std::vector<uint8_t>& data)
+{
+  return create_job_shm(job_id, data, "result");
+}
+
+// Cleanup per-job shared memory segment
+static void cleanup_job_shm(const char* shm_name)
+{
+  if (shm_name[0] != '\0') { shm_unlink(shm_name); }
+}
+
+// ============================================================================
+// Pipe I/O Helpers
+// ============================================================================
+
+// Write all data to a pipe (handles partial writes)
+static bool write_to_pipe(int fd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+  while (remaining > 0) {
+    ssize_t written = ::write(fd, ptr, remaining);
+    if (written <= 0) {
+      if (errno == EINTR) continue;
+      return false;
+    }
+    ptr += written;
+    remaining -= written;
+  }
+  return true;
+}
+
+// Read all data from a pipe (handles partial reads) with timeout
+// timeout_ms: milliseconds to wait for data (default 120000 = 2 minutes)
+static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+  while (remaining > 0) {
+    // Use poll() to wait for data with timeout
+    struct pollfd pfd;
+    pfd.fd     = fd;
+    pfd.events = POLLIN;
+
+    int poll_result = poll(&pfd, 1, timeout_ms);
+    if (poll_result < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] poll() failed on pipe: " << strerror(errno) << "\n";
+      return false;
+    }
+    if (poll_result == 0) {
+      std::cerr << "[Server] Timeout waiting for pipe data (waited " << timeout_ms << "ms)\n";
+      return false;
+    }
+    if (pfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
+      std::cerr << "[Server] Pipe error/hangup detected\n";
+      return false;
+    }
+
+    ssize_t nread = ::read(fd, ptr, remaining);
+    if (nread <= 0) {
+      if (errno == EINTR) continue;
+      if (nread == 0) {
+        std::cerr << "[Server] Pipe EOF (writer closed)\n";
+      }
+      return false;
+    }
+    ptr += nread;
+    remaining -= nread;
+  }
+  return true;
+}
+
+// Send job data to worker via pipe (length-prefixed)
+static bool send_job_data_pipe(int worker_idx, const std::vector<uint8_t>& data)
+{
+  if (worker_idx < 0 || worker_idx >= static_cast<int>(worker_pipes.size())) { return false; }
+  int fd = worker_pipes[worker_idx].to_worker_fd;
+  if (fd < 0) return false;
+
+  // Send size first
+  uint64_t size = data.size();
+  if (!write_to_pipe(fd, &size, sizeof(size))) return false;
+  // Send data
+  if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false;
+  return true;
+}
+
+// Receive job data from pipe (length-prefixed) - called by worker
+static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector<uint8_t>& data)
+{
+  // Read size
+  uint64_t size;
+  if (!read_from_pipe(fd, &size, sizeof(size))) return false;
+  if (size != expected_size) {
+    std::cerr << "[Worker] Size mismatch: expected " << expected_size << ", got " << size << "\n";
+    return false;
+  }
+  // Read data
+  data.resize(size);
+  if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false;
+  return true;
+}
+
+// Send result data to server via pipe (length-prefixed) - called by worker
+static bool send_result_pipe(int fd, const std::vector<uint8_t>& data)
+{
+  // Send size first
+  uint64_t size = data.size();
+  if (!write_to_pipe(fd, &size, sizeof(size))) return false;
+  // Send data
+  if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false;
+  return true;
+}
+
+// Receive result data from worker via pipe (length-prefixed)
+static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector<uint8_t>& data)
+{
+  if (worker_idx < 0 || worker_idx >= static_cast<int>(worker_pipes.size())) { return false; }
+  int fd = worker_pipes[worker_idx].from_worker_fd;
+  if (fd < 0) return false;
+
+  // Read size
+  uint64_t size;
+  if (!read_from_pipe(fd, &size, sizeof(size))) return false;
+  if (size != expected_size) {
+    std::cerr << "[Server] Result size mismatch: expected " << expected_size << ", got " << size
+              << "\n";
+    return false;
+  }
+  // Read data
+  data.resize(size);
+  if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false;
+  return true;
+}
+
+// Submit a job asynchronously (returns job_id)
+std::pair<bool, std::string> submit_job_async(const std::vector<uint8_t>& request_data, bool is_mip)
+{
+  std::string job_id = generate_job_id();
+
+  std::string shm_name;
+  if (config.use_pipes) {
+    // Pipe mode: store data in pending map (will be sent when worker claims job)
+    {
+      std::lock_guard<std::mutex> lock(pending_data_mutex);
+      pending_job_data[job_id] = request_data;
+    }
+  } else {
+    // SHM mode: create per-job shared memory for problem data
+    shm_name = create_job_shm(job_id, request_data, "job");
+    if (shm_name.empty()) { return {false, "Failed to create shared memory for job data"}; }
+  }
+
+  // Find free job slot
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (!job_queue[i].ready && !job_queue[i].claimed) {
+      strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1);
+      job_queue[i].problem_type = is_mip ? 1 : 0;
+      job_queue[i].data_size    = request_data.size();
+      if (!config.use_pipes) {
+        strncpy(
+          job_queue[i].shm_data_name, shm_name.c_str(), sizeof(job_queue[i].shm_data_name) - 1);
+      } else {
+        job_queue[i].shm_data_name[0] = '\0';
+      }
+      job_queue[i].worker_pid   = 0;
+      job_queue[i].worker_index = -1;
+      job_queue[i].data_sent    = false;
+      job_queue[i].claimed      = false;
+      job_queue[i].cancelled    = false;
+      job_queue[i].ready        = true;  // Mark as ready last
+
+      // Track job
+      {
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        JobInfo info;
+        info.job_id         = job_id;
+        info.status         = JobStatus::QUEUED;
+        info.submit_time    = std::chrono::steady_clock::now();
+        info.is_mip         = is_mip;
+        info.is_blocking    = false;
+        job_tracker[job_id] = info;
+      }
+
+      if (config.verbose) { std::cout << "[Server] Job submitted (async): " << job_id << "\n"; }
+
+      return {true, job_id};
+    }
+  }
+
+  // No free slot - cleanup
+  if (config.use_pipes) {
+    std::lock_guard<std::mutex> lock(pending_data_mutex);
+    pending_job_data.erase(job_id);
+  } else {
+    shm_unlink(shm_name.c_str());
+  }
+  return {false, "Job queue full"};
+}
+
+// Check job status
+JobStatus check_job_status(const std::string& job_id, std::string& message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    message = "Job ID not found";
+    return JobStatus::NOT_FOUND;
+  }
+
+  // If status is QUEUED, check if the job has been claimed by a worker
+  // (which means it's now PROCESSING)
+  if (it->second.status == JobStatus::QUEUED) {
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready && job_queue[i].claimed &&
+          std::string(job_queue[i].job_id) == job_id) {
+        it->second.status = JobStatus::PROCESSING;
+        break;
+      }
+    }
+  }
+
+  switch (it->second.status) {
+    case JobStatus::QUEUED: message = "Job is queued"; break;
+    case JobStatus::PROCESSING: message = "Job is being processed"; break;
+    case JobStatus::COMPLETED: message = "Job completed"; break;
+    case JobStatus::FAILED: message = "Job failed: " + it->second.error_message; break;
+    case JobStatus::CANCELLED: message = "Job was cancelled"; break;
+    default: message = "Unknown status";
+  }
+
+  return it->second.status;
+}
+
+// Check if a job is MIP (vs LP)
+bool get_job_is_mip(const std::string& job_id)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+  if (it == job_tracker.end()) {
+    return false;  // Default to LP if not found
+  }
+  return it->second.is_mip;
+}
+
+// Get job result
+bool get_job_result(const std::string& job_id,
+                    std::vector<uint8_t>& result_data,
+                    std::string& error_message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    error_message = "Job ID not found";
+    return false;
+  }
+
+  if (it->second.status == JobStatus::COMPLETED) {
+    result_data = it->second.result_data;
+    return true;
+  } else if (it->second.status == JobStatus::FAILED) {
+    error_message = it->second.error_message;
+    return false;
+  } else {
+    error_message = "Job not completed yet";
+    return false;
+  }
+}
+
+// Wait for job to complete (blocking)
+// This uses condition variables - the thread will sleep until the job is done
+bool wait_for_result(const std::string& job_id,
+                     std::vector<uint8_t>& result_data,
+                     std::string& error_message)
+{
+  // First check if job already completed
+  {
+    std::lock_guard<std::mutex> lock(tracker_mutex);
+    auto it = job_tracker.find(job_id);
+
+    if (it == job_tracker.end()) {
+      error_message = "Job ID not found";
+      return false;
+    }
+
+    // If already in terminal state, return immediately
+    if (it->second.status == JobStatus::COMPLETED) {
+      result_data = it->second.result_data;
+      return true;
+    } else if (it->second.status == JobStatus::FAILED) {
+      error_message = it->second.error_message;
+      return false;
+    } else if (it->second.status == JobStatus::CANCELLED) {
+      error_message = "Job was cancelled";
+      return false;
+    }
+  }
+
+  // Job is still running - create a waiter and wait on condition variable
+  auto waiter = std::make_shared<JobWaiter>();
+
+  {
+    std::lock_guard<std::mutex> lock(waiters_mutex);
+    waiting_threads[job_id] = waiter;
+  }
+
+  if (config.verbose) {
+    std::cout << "[Server] WAIT_FOR_RESULT: waiting for job " << job_id << "\n";
+  }
+
+  // Wait on the condition variable - this thread will sleep until signaled
+  {
+    std::unique_lock<std::mutex> lock(waiter->mutex);
+    waiter->cv.wait(lock, [&waiter] { return waiter->ready; });
+  }
+
+  // Remove from waiting_threads
+  {
+    std::lock_guard<std::mutex> lock(waiters_mutex);
+    waiting_threads.erase(job_id);
+  }
+
+  if (config.verbose) {
+    std::cout << "[Server] WAIT_FOR_RESULT: job " << job_id
+              << " completed, success=" << waiter->success << "\n";
+  }
+
+  if (waiter->success) {
+    result_data = std::move(waiter->result_data);
+    return true;
+  } else {
+    error_message = waiter->error_message;
+    return false;
+  }
+}
+
+// ============================================================================
+// Log File Management
+// ============================================================================
+
+// Directory for per-job log files
+const std::string LOG_DIR = "/tmp/cuopt_logs";
+
+// Get the log file path for a given job_id
+std::string get_log_file_path(const std::string& job_id) { return LOG_DIR + "/log_" + job_id; }
+
+// Ensure log directory exists
+void ensure_log_dir_exists()
+{
+  struct stat st;
+  if (stat(LOG_DIR.c_str(), &st) != 0) { mkdir(LOG_DIR.c_str(), 0755); }
+}
+
+// Delete log file for a job
+void delete_log_file(const std::string& job_id)
+{
+  std::string log_file = get_log_file_path(job_id);
+  unlink(log_file.c_str());  // Ignore errors if file doesn't exist
+}
+
+// Delete job
+bool delete_job(const std::string& job_id)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) { return false; }
+
+  job_tracker.erase(it);
+
+  // Also delete the log file
+  delete_log_file(job_id);
+
+  if (config.verbose) { std::cout << "[Server] Job deleted: " << job_id << "\n"; }
+
+  return true;
+}
+
+// Cancel job - returns: 0=success, 1=job_not_found, 2=already_completed, 3=already_cancelled
+// Also returns the job's status after cancel attempt via job_status_out
+int cancel_job(const std::string& job_id, JobStatus& job_status_out, std::string& message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    message        = "Job ID not found";
+    job_status_out = JobStatus::NOT_FOUND;
+    return 1;
+  }
+
+  JobStatus current_status = it->second.status;
+
+  // Can't cancel completed jobs
+  if (current_status == JobStatus::COMPLETED) {
+    message        = "Cannot cancel completed job";
+    job_status_out = JobStatus::COMPLETED;
+    return 2;
+  }
+
+  // Already cancelled
+  if (current_status == JobStatus::CANCELLED) {
+    message        = "Job already cancelled";
+    job_status_out = JobStatus::CANCELLED;
+    return 3;
+  }
+
+  // Can't cancel failed jobs
+  if (current_status == JobStatus::FAILED) {
+    message        = "Cannot cancel failed job";
+    job_status_out = JobStatus::FAILED;
+    return 2;
+  }
+
+  // Find the job in the shared memory queue
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (job_queue[i].ready && strcmp(job_queue[i].job_id, job_id.c_str()) == 0) {
+      // Check if job is being processed by a worker
+      pid_t worker_pid = job_queue[i].worker_pid;
+
+      if (worker_pid > 0 && job_queue[i].claimed) {
+        // Job is being processed - kill the worker
+        if (config.verbose) {
+          std::cout << "[Server] Cancelling running job " << job_id << " (killing worker "
+                    << worker_pid << ")\n";
+        }
+        kill(worker_pid, SIGKILL);
+        // The worker monitor thread will detect the dead worker, restart it,
+        // and mark_worker_jobs_failed will be called. But we want CANCELLED not FAILED.
+        // So we mark it as cancelled here first.
+        job_queue[i].cancelled = true;
+      } else {
+        // Job is queued but not yet claimed - mark as cancelled
+        if (config.verbose) { std::cout << "[Server] Cancelling queued job " << job_id << "\n"; }
+        job_queue[i].cancelled = true;
+      }
+
+      // Update job tracker
+      it->second.status        = JobStatus::CANCELLED;
+      it->second.error_message = "Job cancelled by user";
+      job_status_out           = JobStatus::CANCELLED;
+      message                  = "Job cancelled successfully";
+
+      // Delete the log file for this job
+      delete_log_file(job_id);
+
+      // Wake up any threads waiting for this job
+      {
+        std::lock_guard<std::mutex> wlock(waiters_mutex);
+        auto wit = waiting_threads.find(job_id);
+        if (wit != waiting_threads.end()) {
+          auto waiter           = wit->second;
+          waiter->error_message = "Job cancelled by user";
+          waiter->success       = false;
+          waiter->ready         = true;
+          waiter->cv.notify_one();
+        }
+      }
+
+      return 0;
+    }
+  }
+
+  // Job not found in queue (might have already finished processing)
+  // Re-check status since we hold the lock
+  if (it->second.status == JobStatus::COMPLETED) {
+    message        = "Cannot cancel completed job";
+    job_status_out = JobStatus::COMPLETED;
+    return 2;
+  }
+
+  // Job must be in flight or in an edge case - mark as cancelled anyway
+  it->second.status        = JobStatus::CANCELLED;
+  it->second.error_message = "Job cancelled by user";
+  job_status_out           = JobStatus::CANCELLED;
+  message                  = "Job cancelled";
+
+  // Wake up any threads waiting for this job
+  {
+    std::lock_guard<std::mutex> wlock(waiters_mutex);
+    auto wit = waiting_threads.find(job_id);
+    if (wit != waiting_threads.end()) {
+      auto waiter           = wit->second;
+      waiter->error_message = "Job cancelled by user";
+      waiter->success       = false;
+      waiter->ready         = true;
+      waiter->cv.notify_one();
+    }
+  }
+
+  return 0;
+}
+
+// ============================================================================
+// Sync Mode Handler (with log streaming)
+// ============================================================================
+
+/**
+ * @brief Handle synchronous (blocking) solve requests directly.
+ *
+ * For sync mode, we solve directly in the main thread instead of using worker
+ * processes. This allows stdout log streaming to work correctly since the
+ * stdout_streamer_t captures output from the same process.
+ */
+void handle_sync_solve(int client_fd,
+                       const std::vector<uint8_t>& request_data,
+                       bool is_mip,
+                       bool stream_logs)
+{
+  std::string job_id = generate_job_id();
+
+  if (config.verbose) {
+    std::cout << "[Server] Sync solve request, job_id: " << job_id
+              << " (streaming: " << (stream_logs ? "yes" : "no") << ")\n";
+  }
+
+  auto serializer = get_serializer<int, double>();
+
+  // Create RAFT handle for GPU operations
+  raft::handle_t handle;
+
+  std::vector<uint8_t> result_data;
+  std::string error_message;
+  bool success = false;
+
+  // Use RAII stdout streamer - captures stdout and streams to client while
+  // also echoing to server console. Destructor restores original stdout.
+  {
+    stdout_streamer_t streamer(client_fd, stream_logs);
+
+    try {
+      if (is_mip) {
+        cuopt::mps_parser::mps_data_model_t<int, double> mps_data;
+        mip_solver_settings_t<int, double> settings;
+
+        if (serializer->deserialize_mip_request(request_data, mps_data, settings)) {
+          auto solution = solve_mip(&handle, mps_data, settings);
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_mip_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize MIP request";
+        }
+      } else {
+        cuopt::mps_parser::mps_data_model_t<int, double> mps_data;
+        pdlp_solver_settings_t<int, double> settings;
+
+        if (serializer->deserialize_lp_request(request_data, mps_data, settings)) {
+          auto solution = solve_lp(&handle, mps_data, settings);
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_lp_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize LP request";
+        }
+      }
+    } catch (const std::exception& e) {
+      error_message = std::string("Exception: ") + e.what();
+    }
+  }  // streamer destructor restores stdout
+
+  if (config.verbose) {
+    std::cout << "[Server] Sync solve completed: " << job_id << " (success: " << success << ")\n";
+  }
+
+  // Send result to client
+  if (success) {
+    std::cout << "[Server] Sending solution message, size = " << result_data.size() << " bytes\n";
+    send_solution_message(client_fd, result_data);
+  } else {
+    std::cerr << "[Server] Sync solve failed: " << error_message << "\n";
+    // Send empty solution to indicate failure
+    std::vector<uint8_t> empty;
+    send_solution_message(client_fd, empty);
+  }
+
+  close(client_fd);
+}
+
+// ============================================================================
+// Client Connection Handler
+// ============================================================================
+
+void handle_client(int client_fd, bool stream_logs)
+{
+  auto serializer = get_serializer<int, double>();
+
+  // Receive request
+  std::vector<uint8_t> request_data;
+  if (!receive_request(client_fd, request_data)) {
+    std::cerr << "[Server] Failed to receive request\n";
+    close(client_fd);
+    return;
+  }
+
+  if (config.verbose) {
+    std::cout << "[Server] Received request, size: " << request_data.size() << " bytes\n";
+  }
+
+  // Determine if this is an async protocol request
+  bool is_async_request = serializer->is_async_request(request_data);
+
+  if (is_async_request) {
+    // Parse async request type and handle accordingly
+    auto request_type = serializer->get_async_request_type(request_data);
+
+    if (request_type == 0) {  // SUBMIT_JOB
+      bool blocking = serializer->is_blocking_request(request_data);
+      bool is_mip   = serializer->is_mip_request(request_data);
+
+      // Extract the actual problem data from the async request
+      std::vector<uint8_t> problem_data = serializer->extract_problem_data(request_data);
+
+      // UNIFIED ARCHITECTURE: All jobs go through the queue
+      // Submit job to queue (same for both sync and async)
+      auto [submit_ok, job_id_or_error] = submit_job_async(problem_data, is_mip);
+
+      if (!submit_ok) {
+        // Submission failed
+        auto response = serializer->serialize_submit_response(false, job_id_or_error);
+        uint64_t size = response.size();
+        write_all(client_fd, &size, sizeof(size));
+        write_all(client_fd, response.data(), response.size());
+      } else if (blocking) {
+        // BLOCKING MODE: Wait for result using condition variable (no polling)
+        // This unifies sync/async - job goes through queue but we wait here
+        std::string job_id = job_id_or_error;
+
+        if (config.verbose) {
+          std::cout << "[Server] Blocking request, job_id: " << job_id
+                    << " (waiting for completion)\n";
+        }
+
+        std::vector<uint8_t> result_data;
+        std::string error_message;
+
+        // Block on condition variable until job completes
+        bool success = wait_for_result(job_id, result_data, error_message);
+
+        // NOTE: We do NOT auto-delete here. The client should call DELETE_RESULT
+        // after consuming all logs. This allows the pattern:
+        //   1. Submit job (blocking=true or async + WAIT_FOR_RESULT)
+        //   2. Retrieve logs (GET_LOGS) - can be done in parallel thread
+        //   3. Delete job (DELETE_RESULT) when done with logs
+
+        // Return result response (same format as GET_RESULT)
+        bool job_is_mip = is_mip;  // Use the is_mip from the submit request
+        auto response =
+          serializer->serialize_result_response(success, result_data, error_message, job_is_mip);
+        uint64_t size = response.size();
+        write_all(client_fd, &size, sizeof(size));
+        write_all(client_fd, response.data(), response.size());
+
+        if (config.verbose) {
+          std::cout << "[Server] Blocking request completed: " << job_id << ", success=" << success
+                    << "\n";
+        }
+      } else {
+        // ASYNC MODE: Return job_id immediately
+        auto response = serializer->serialize_submit_response(true, job_id_or_error);
+        uint64_t size = response.size();
+        write_all(client_fd, &size, sizeof(size));
+        write_all(client_fd, response.data(), response.size());
+      }
+    } else if (request_type == 1) {  // CHECK_STATUS
+      std::string job_id = serializer->get_job_id(request_data);
+      std::string message;
+      JobStatus status = check_job_status(job_id, message);
+
+      int status_code = 0;
+      switch (status) {
+        case JobStatus::QUEUED: status_code = 0; break;
+        case JobStatus::PROCESSING: status_code = 1; break;
+        case JobStatus::COMPLETED: status_code = 2; break;
+        case JobStatus::FAILED: status_code = 3; break;
+        case JobStatus::NOT_FOUND: status_code = 4; break;
+        case JobStatus::CANCELLED: status_code = 5; break;
+      }
+
+      auto response = serializer->serialize_status_response(status_code, message);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+    } else if (request_type == 2) {  // GET_RESULT
+      std::string job_id = serializer->get_job_id(request_data);
+      std::vector<uint8_t> result_data;
+      std::string error_message;
+
+      bool success    = get_job_result(job_id, result_data, error_message);
+      bool job_is_mip = get_job_is_mip(job_id);
+      auto response =
+        serializer->serialize_result_response(success, result_data, error_message, job_is_mip);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+    } else if (request_type == 3) {  // DELETE_RESULT
+      std::string job_id = serializer->get_job_id(request_data);
+      bool success       = delete_job(job_id);
+
+      auto response = serializer->serialize_delete_response(success);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+    } else if (request_type == 4) {  // GET_LOGS
+      std::string job_id = serializer->get_job_id(request_data);
+      int64_t frombyte   = serializer->get_frombyte(request_data);
+
+      std::vector<std::string> log_lines;
+      int64_t nbytes  = 0;
+      bool job_exists = false;
+
+      // Read logs from file
+      std::string log_file = get_log_file_path(job_id);
+      std::ifstream ifs(log_file);
+      if (ifs.is_open()) {
+        job_exists = true;
+        ifs.seekg(frombyte);
+        std::string line;
+        while (std::getline(ifs, line)) {
+          log_lines.push_back(line);
+        }
+        nbytes = ifs.tellg();
+        if (nbytes < 0) {
+          // tellg returns -1 at EOF, get actual file size
+          ifs.clear();
+          ifs.seekg(0, std::ios::end);
+          nbytes = ifs.tellg();
+        }
+        ifs.close();
+      } else {
+        // Check if job exists but log file doesn't (not started yet)
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        job_exists = (job_tracker.find(job_id) != job_tracker.end());
+      }
+
+      auto response = serializer->serialize_logs_response(job_id, log_lines, nbytes, job_exists);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+
+      if (config.verbose) {
+        std::cout << "[Server] GET_LOGS: job=" << job_id << ", frombyte=" << frombyte
+                  << ", lines=" << log_lines.size() << ", nbytes=" << nbytes << "\n";
+      }
+    } else if (request_type == 5) {  // CANCEL_JOB
+      std::string job_id = serializer->get_job_id(request_data);
+
+      JobStatus job_status_out;
+      std::string message;
+      int result = cancel_job(job_id, job_status_out, message);
+
+      // Convert JobStatus to status code
+      int status_code = 0;
+      switch (job_status_out) {
+        case JobStatus::QUEUED: status_code = 0; break;
+        case JobStatus::PROCESSING: status_code = 1; break;
+        case JobStatus::COMPLETED: status_code = 2; break;
+        case JobStatus::FAILED: status_code = 3; break;
+        case JobStatus::NOT_FOUND: status_code = 4; break;
+        case JobStatus::CANCELLED: status_code = 5; break;
+      }
+
+      bool success  = (result == 0);
+      auto response = serializer->serialize_cancel_response(success, message, status_code);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+
+      if (config.verbose) {
+        std::cout << "[Server] CANCEL_JOB: job=" << job_id << ", success=" << success
+                  << ", message=" << message << "\n";
+      }
+    } else if (request_type == 6) {  // WAIT_FOR_RESULT
+      std::string job_id = serializer->get_job_id(request_data);
+
+      if (config.verbose) {
+        std::cout << "[Server] WAIT_FOR_RESULT: job=" << job_id << " (blocking until complete)\n";
+      }
+
+      std::vector<uint8_t> result_data;
+      std::string error_message;
+
+      // This will block until the job completes (uses condition variable, no polling)
+      bool success = wait_for_result(job_id, result_data, error_message);
+
+      // Send result response (same format as GET_RESULT)
+      bool job_is_mip = get_job_is_mip(job_id);
+      auto response =
+        serializer->serialize_result_response(success, result_data, error_message, job_is_mip);
+
+      uint64_t size = response.size();
+      write_all(client_fd, &size, sizeof(size));
+      write_all(client_fd, response.data(), response.size());
+
+      if (config.verbose) {
+        std::cout << "[Server] WAIT_FOR_RESULT: job=" << job_id << " completed, success=" << success
+                  << "\n";
+      }
+    }
+
+    close(client_fd);
+  } else {
+    // Legacy/simple request format - treat as sync LP/MIP request
+    bool is_mip = serializer->is_mip_request(request_data);
+    handle_sync_solve(client_fd, request_data, is_mip, stream_logs);
+  }
+}
+
+// ============================================================================
+// Main
+// ============================================================================
+
+void print_usage(const char* prog)
+{
+  std::cout << "Usage: " << prog << " [options]\n"
+            << "Options:\n"
+            << "  -p PORT    Port to listen on (default: 9090)\n"
+            << "  -w NUM     Number of worker processes (default: 1)\n"
+            << "  -q         Quiet mode (less verbose output)\n"
+            << "  --no-stream  Disable real-time log streaming to clients\n"
+            << "  --use-shm    Use POSIX shared memory for IPC (default: pipes)\n"
+            << "               Pipes are container-friendly; shm may be faster but\n"
+            << "               requires /dev/shm with sufficient size\n"
+            << "  -h         Show this help\n"
+            << "\n"
+            << "Environment Variables (client-side):\n"
+            << "  CUOPT_REMOTE_USE_SYNC=1  Force sync mode (default is async)\n";
+}
+
+int main(int argc, char** argv)
+{
+  // Parse arguments
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "-p") == 0 && i + 1 < argc) {
+      config.port = std::stoi(argv[++i]);
+    } else if (strcmp(argv[i], "-w") == 0 && i + 1 < argc) {
+      config.num_workers = std::stoi(argv[++i]);
+    } else if (strcmp(argv[i], "-q") == 0) {
+      config.verbose = false;
+    } else if (strcmp(argv[i], "--no-stream") == 0) {
+      config.stream_logs = false;
+    } else if (strcmp(argv[i], "--use-shm") == 0) {
+      config.use_pipes = false;  // Use shared memory instead of pipes
+    } else if (strcmp(argv[i], "-h") == 0) {
+      print_usage(argv[0]);
+      return 0;
+    }
+  }
+
+  // Set up signal handlers
+  signal(SIGINT, signal_handler);
+  signal(SIGTERM, signal_handler);
+  signal(SIGPIPE, SIG_IGN);  // Ignore SIGPIPE (broken pipe) - happens when writing to closed pipes
+
+  // IMPORTANT: Clear remote solve environment variables to prevent infinite recursion
+  unsetenv("CUOPT_REMOTE_HOST");
+  unsetenv("CUOPT_REMOTE_PORT");
+
+  // Ensure log directory exists for per-job log files
+  ensure_log_dir_exists();
+
+  std::cout << "=== cuOpt Remote Solve Server (Async) ===\n";
+  std::cout << "Port: " << config.port << "\n";
+  std::cout << "Workers: " << config.num_workers << " (processes)\n";
+  std::cout << "Log streaming: " << (config.stream_logs ? "enabled" : "disabled") << "\n";
+  std::cout << "IPC mode: " << (config.use_pipes ? "pipes (container-friendly)" : "shared memory")
+            << "\n";
+  std::cout << "\n";
+  std::cout << "Async API:\n";
+  std::cout << "  SUBMIT_JOB      - Submit a job, get job_id\n";
+  std::cout << "  CHECK_STATUS    - Check job status\n";
+  std::cout << "  GET_RESULT      - Retrieve completed result\n";
+  std::cout << "  DELETE_RESULT   - Delete job from server\n";
+  std::cout << "  GET_LOGS        - Retrieve log output\n";
+  std::cout << "  CANCEL_JOB      - Cancel a queued or running job\n";
+  std::cout << "  WAIT_FOR_RESULT - Block until job completes (no polling)\n";
+  std::cout << "\n";
+
+  // Initialize shared memory
+  if (!init_shared_memory()) {
+    std::cerr << "[Server] Failed to initialize shared memory\n";
+    return 1;
+  }
+
+  // Spawn worker processes
+  spawn_workers();
+
+  // Start result retrieval thread
+  std::thread result_thread(result_retrieval_thread);
+
+  // Start worker monitor thread (detects dead workers and restarts them)
+  std::thread monitor_thread(worker_monitor_thread);
+
+  // Create server socket
+  int server_fd = socket(AF_INET, SOCK_STREAM, 0);
+  if (server_fd < 0) {
+    std::cerr << "[Server] Failed to create socket\n";
+    cleanup_shared_memory();
+    return 1;
+  }
+
+  int opt = 1;
+  setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+  struct sockaddr_in addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sin_family      = AF_INET;
+  addr.sin_addr.s_addr = INADDR_ANY;
+  addr.sin_port        = htons(config.port);
+
+  if (bind(server_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+    std::cerr << "[Server] Failed to bind to port " << config.port << "\n";
+    close(server_fd);
+    cleanup_shared_memory();
+    return 1;
+  }
+
+  if (listen(server_fd, 10) < 0) {
+    std::cerr << "[Server] Failed to listen\n";
+    close(server_fd);
+    cleanup_shared_memory();
+    return 1;
+  }
+
+  std::cout << "[Server] Listening on port " << config.port << "\n";
+
+  // Flush stdout before accept loop
+  std::cout.flush();
+
+  // Accept connections
+  while (keep_running) {
+    struct sockaddr_in client_addr;
+    socklen_t client_len = sizeof(client_addr);
+
+    fd_set read_fds;
+    FD_ZERO(&read_fds);
+    FD_SET(server_fd, &read_fds);
+
+    struct timeval tv;
+    tv.tv_sec  = 1;
+    tv.tv_usec = 0;
+
+    int ready = select(server_fd + 1, &read_fds, nullptr, nullptr, &tv);
+    if (ready < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] Select error\n";
+      break;
+    }
+    if (ready == 0) continue;
+
+    int client_fd = accept(server_fd, (struct sockaddr*)&client_addr, &client_len);
+    if (client_fd < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] Accept error\n";
+      continue;
+    }
+
+    if (config.verbose) {
+      char client_ip[INET_ADDRSTRLEN];
+      inet_ntop(AF_INET, &client_addr.sin_addr, client_ip, INET_ADDRSTRLEN);
+      std::cout << "[Server] Connection from " << client_ip << "\n";
+    }
+
+    // Handle client in separate thread
+    std::thread([client_fd]() { handle_client(client_fd, config.stream_logs); }).detach();
+  }
+
+  // Shutdown
+  std::cout << "[Server] Shutting down...\n";
+  close(server_fd);
+
+  // Signal workers to stop
+  if (shm_ctrl) { shm_ctrl->shutdown_requested = true; }
+
+  // Wait for result retrieval thread
+  result_cv.notify_all();
+  if (result_thread.joinable()) { result_thread.join(); }
+
+  // Wait for worker monitor thread
+  if (monitor_thread.joinable()) { monitor_thread.join(); }
+
+  // Wait for workers
+  wait_for_workers();
+
+  // Cleanup
+  cleanup_shared_memory();
+
+  std::cout << "[Server] Stopped\n";
+  return 0;
+}
diff --git a/cpp/include/cuopt/linear_programming/data_model_view.hpp b/cpp/include/cuopt/linear_programming/data_model_view.hpp
new file mode 100644
index 000000000..413987227
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/data_model_view.hpp
@@ -0,0 +1,58 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+/**
+ * @file data_model_view.hpp
+ * @brief Provides data_model_view_t in the cuopt::linear_programming namespace.
+ *
+ * This header provides access to the data_model_view_t class, a non-owning view
+ * over LP/MIP problem data. The view uses span<T> to hold pointers that can
+ * reference either host or device memory, making it suitable for both local
+ * GPU-based solves and remote CPU-based solves.
+ *
+ * The canonical implementation lives in cuopt::mps_parser for historical reasons
+ * and to maintain mps_parser as a standalone library. This header provides
+ * convenient aliases in the cuopt::linear_programming namespace.
+ */
+
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/utilities/span.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Non-owning span type that can point to either host or device memory.
+ *
+ * This is an alias to the span type defined in mps_parser. The span holds
+ * a pointer and size, but does not own the underlying memory.
+ *
+ * @tparam T Element type
+ */
+template <typename T>
+using span = cuopt::mps_parser::span<T>;
+
+/**
+ * @brief Non-owning view of LP/MIP problem data.
+ *
+ * This is an alias to the data_model_view_t defined in mps_parser.
+ * The view stores problem data (constraint matrix, bounds, objective, etc.)
+ * as span<T> members, which can point to either host or device memory.
+ *
+ * Key features for remote solve support:
+ * - Non-owning: does not allocate or free memory
+ * - Memory-agnostic: spans can point to host OR device memory
+ * - Serializable: host data can be directly serialized for remote solve
+ *
+ * @tparam i_t Integer type for indices (typically int)
+ * @tparam f_t Floating point type for values (typically float or double)
+ */
+template <typename i_t, typename f_t>
+using data_model_view_t = cuopt::mps_parser::data_model_view_t<i_t, f_t>;
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
index 6ff8d324b..4d896cf7d 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -18,6 +18,7 @@
 #include <raft/core/handle.hpp>
 
 #include <fstream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -51,10 +52,38 @@ class mip_solution_t : public base_solution_t {
                  rmm::cuda_stream_view stream_view);
   mip_solution_t(const cuopt::logic_error& error_status, rmm::cuda_stream_view stream_view);
 
+  // CPU-only constructors for remote solve
+  mip_solution_t(std::vector<f_t> solution,
+                 std::vector<std::string> var_names,
+                 f_t objective,
+                 f_t mip_gap,
+                 mip_termination_status_t termination_status,
+                 f_t max_constraint_violation,
+                 f_t max_int_violation,
+                 f_t max_variable_bound_violation,
+                 solver_stats_t<i_t, f_t> stats);
+
+  mip_solution_t(mip_termination_status_t termination_status, solver_stats_t<i_t, f_t> stats);
+  mip_solution_t(const cuopt::logic_error& error_status);
+
   bool is_mip() const override { return true; }
+
+  /**
+   * @brief Check if solution data is stored in device (GPU) memory
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const;
+
   const rmm::device_uvector<f_t>& get_solution() const;
   rmm::device_uvector<f_t>& get_solution();
 
+  /**
+   * @brief Returns the solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   */
+  std::vector<f_t>& get_solution_host();
+  const std::vector<f_t>& get_solution_host() const;
+
   f_t get_objective_value() const;
   f_t get_mip_gap() const;
   f_t get_solution_bound() const;
@@ -75,8 +104,105 @@ class mip_solution_t : public base_solution_t {
   void write_to_sol_file(std::string_view filename, rmm::cuda_stream_view stream_view) const;
   void log_summary() const;
 
+  //============================================================================
+  // Setters for remote solve deserialization
+  //============================================================================
+
+  /**
+   * @brief Set the solution in host memory
+   * @param solution The solution vector
+   */
+  void set_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the objective value
+   */
+  void set_objective(f_t value);
+
+  /**
+   * @brief Set the MIP gap
+   */
+  void set_mip_gap(f_t value);
+
+  /**
+   * @brief Set the solution bound
+   */
+  void set_solution_bound(f_t value);
+
+  /**
+   * @brief Set total solve time
+   */
+  void set_total_solve_time(double value);
+
+  /**
+   * @brief Set presolve time
+   */
+  void set_presolve_time(double value);
+
+  /**
+   * @brief Set max constraint violation
+   */
+  void set_max_constraint_violation(f_t value);
+
+  /**
+   * @brief Set max integer violation
+   */
+  void set_max_int_violation(f_t value);
+
+  /**
+   * @brief Set max variable bound violation
+   */
+  void set_max_variable_bound_violation(f_t value);
+
+  /**
+   * @brief Set number of nodes
+   */
+  void set_nodes(i_t value);
+
+  /**
+   * @brief Set number of simplex iterations
+   */
+  void set_simplex_iterations(i_t value);
+
+  /**
+   * @brief Get error string
+   */
+  std::string get_error_string() const;
+
+  /**
+   * @brief Get number of nodes
+   */
+  i_t get_nodes() const;
+
+  /**
+   * @brief Get number of simplex iterations
+   */
+  i_t get_simplex_iterations() const;
+
+  /**
+   * @brief Copy solution data from GPU to CPU memory.
+   *
+   * After calling this method, is_device_memory() will return false and
+   * the solution can be accessed via get_solution_host().
+   * This is useful for remote solve scenarios where serialization requires
+   * CPU-accessible data.
+   *
+   * If the solution is already in CPU memory, this is a no-op.
+   *
+   * @param stream_view The CUDA stream to use for the copy
+   */
+  void to_host(rmm::cuda_stream_view stream_view);
+
  private:
-  rmm::device_uvector<f_t> solution_;
+  // GPU (device) storage - populated for local GPU solves
+  std::unique_ptr<rmm::device_uvector<f_t>> solution_;
+
+  // CPU (host) storage - populated for remote solves
+  std::unique_ptr<std::vector<f_t>> solution_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   std::vector<std::string> var_names_;
   f_t objective_;
   f_t mip_gap_;
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
index c5fe96ef1..9d7e34831 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -18,6 +18,7 @@
 #include <raft/core/handle.hpp>
 
 #include <fstream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -168,6 +169,42 @@ class optimization_problem_solution_t : public base_solution_t {
                                   const raft::handle_t* handler_ptr,
                                   bool deep_copy);
 
+  /**
+   * @brief Construct an optimization problem solution with CPU (host) memory storage.
+   * Used for remote solve scenarios where no GPU is available.
+   *
+   * @param[in] primal_solution The primal solution in host memory
+   * @param[in] dual_solution The dual solution in host memory
+   * @param[in] reduced_cost The reduced cost in host memory
+   * @param[in] objective_name The objective name
+   * @param[in] var_names The variables names
+   * @param[in] row_names The rows name
+   * @param[in] termination_stats The termination statistics
+   * @param[in] termination_status The termination reason
+   */
+  optimization_problem_solution_t(std::vector<f_t> primal_solution,
+                                  std::vector<f_t> dual_solution,
+                                  std::vector<f_t> reduced_cost,
+                                  const std::string objective_name,
+                                  const std::vector<std::string>& var_names,
+                                  const std::vector<std::string>& row_names,
+                                  additional_termination_information_t& termination_stats,
+                                  pdlp_termination_status_t termination_status);
+
+  /**
+   * @brief Construct an empty solution for CPU-only scenarios (e.g., remote solve error)
+   *
+   * @param[in] termination_status Reason for termination
+   */
+  optimization_problem_solution_t(pdlp_termination_status_t termination_status);
+
+  /**
+   * @brief Construct an error solution for CPU-only scenarios
+   *
+   * @param[in] error_status The error object
+   */
+  optimization_problem_solution_t(cuopt::logic_error error_status);
+
   /**
    * @brief Set the solve time in seconds
    *
@@ -234,6 +271,40 @@ class optimization_problem_solution_t : public base_solution_t {
    */
   rmm::device_uvector<f_t>& get_reduced_cost();
 
+  /**
+   * @brief Check if solution data is stored in device (GPU) memory
+   *
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const;
+
+  /**
+   * @brief Returns the primal solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the primal solution.
+   */
+  std::vector<f_t>& get_primal_solution_host();
+  const std::vector<f_t>& get_primal_solution_host() const;
+
+  /**
+   * @brief Returns the dual solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the dual solution.
+   */
+  std::vector<f_t>& get_dual_solution_host();
+  const std::vector<f_t>& get_dual_solution_host() const;
+
+  /**
+   * @brief Returns the reduced cost in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the reduced cost.
+   */
+  std::vector<f_t>& get_reduced_cost_host();
+  const std::vector<f_t>& get_reduced_cost_host() const;
+
   /**
    * @brief Get termination reason
    * @return Termination reason
@@ -255,6 +326,128 @@ class optimization_problem_solution_t : public base_solution_t {
 
   pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data();
 
+  //============================================================================
+  // Setters for host solution data (used by remote solve deserialization)
+  //============================================================================
+
+  /**
+   * @brief Set the primal solution in host memory
+   * @param solution The primal solution vector
+   */
+  void set_primal_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the dual solution in host memory
+   * @param solution The dual solution vector
+   */
+  void set_dual_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the reduced cost in host memory
+   * @param reduced_cost The reduced cost vector
+   */
+  void set_reduced_cost_host(std::vector<f_t> reduced_cost);
+
+  /**
+   * @brief Set the termination statistics
+   * @param stats The termination statistics
+   */
+  void set_termination_stats(const additional_termination_information_t& stats);
+
+  //============================================================================
+  // Getters for termination statistics
+  //============================================================================
+
+  /**
+   * @brief Get the L2 primal residual
+   * @return L2 primal residual
+   */
+  f_t get_l2_primal_residual() const;
+
+  /**
+   * @brief Get the L2 dual residual
+   * @return L2 dual residual
+   */
+  f_t get_l2_dual_residual() const;
+
+  /**
+   * @brief Get the primal objective value
+   * @return Primal objective
+   */
+  f_t get_primal_objective() const;
+
+  /**
+   * @brief Get the dual objective value
+   * @return Dual objective
+   */
+  f_t get_dual_objective() const;
+
+  /**
+   * @brief Get the duality gap
+   * @return Gap
+   */
+  f_t get_gap() const;
+
+  /**
+   * @brief Get number of iterations
+   * @return Number of iterations
+   */
+  i_t get_nb_iterations() const;
+
+  /**
+   * @brief Check if solved by PDLP
+   * @return true if solved by PDLP
+   */
+  bool get_solved_by_pdlp() const;
+
+  /**
+   * @brief Set L2 primal residual
+   * @param value The value
+   */
+  void set_l2_primal_residual(f_t value);
+
+  /**
+   * @brief Set L2 dual residual
+   * @param value The value
+   */
+  void set_l2_dual_residual(f_t value);
+
+  /**
+   * @brief Set primal objective
+   * @param value The value
+   */
+  void set_primal_objective(f_t value);
+
+  /**
+   * @brief Set dual objective
+   * @param value The value
+   */
+  void set_dual_objective(f_t value);
+
+  /**
+   * @brief Set gap
+   * @param value The value
+   */
+  void set_gap(f_t value);
+
+  /**
+   * @brief Set number of iterations
+   * @param value The value
+   */
+  void set_nb_iterations(i_t value);
+
+  /**
+   * @brief Set solved by PDLP flag
+   * @param value The value
+   */
+  void set_solved_by_pdlp(bool value);
+
+  /**
+   * @brief Get error string
+   * @return Error message string
+   */
+  std::string get_error_string() const;
+
   /**
    * @brief Writes the solver_solution object as a JSON object to the 'filename' file using
    * 'stream_view' to transfer the data from device to host before it is written to the file.
@@ -282,12 +475,39 @@ class optimization_problem_solution_t : public base_solution_t {
   void copy_from(const raft::handle_t* handle_ptr,
                  const optimization_problem_solution_t<i_t, f_t>& other);
 
+  /**
+   * @brief Copy solution data from GPU to CPU memory.
+   *
+   * After calling this method, is_device_memory() will return false and
+   * the solution can be accessed via get_primal_solution_host(), etc.
+   * This is useful for remote solve scenarios where serialization requires
+   * CPU-accessible data.
+   *
+   * If the solution is already in CPU memory, this is a no-op.
+   *
+   * @param stream_view The CUDA stream to use for the copy
+   */
+  void to_host(rmm::cuda_stream_view stream_view);
+
  private:
   void write_additional_termination_statistics_to_file(std::ofstream& myfile);
 
-  rmm::device_uvector<f_t> primal_solution_;
-  rmm::device_uvector<f_t> dual_solution_;
-  rmm::device_uvector<f_t> reduced_cost_;
+  // GPU (device) storage - populated for local GPU solves
+  std::unique_ptr<rmm::device_uvector<f_t>> primal_solution_;
+  std::unique_ptr<rmm::device_uvector<f_t>> dual_solution_;
+  std::unique_ptr<rmm::device_uvector<f_t>> reduced_cost_;
+
+  // CPU (host) storage - populated for remote solves
+  std::unique_ptr<std::vector<f_t>> primal_solution_host_;
+  std::unique_ptr<std::vector<f_t>> dual_solution_host_;
+  std::unique_ptr<std::vector<f_t>> reduced_cost_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
+  // Flag indicating if solved by PDLP (vs dual simplex)
+  bool solved_by_pdlp_ = true;
+
   pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
 
   pdlp_termination_status_t termination_status_;
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index 364fee30a..e06c69a03 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
@@ -14,6 +15,7 @@
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mps_parser/mps_data_model.hpp>
 #include <string>
 #include <vector>
@@ -107,4 +109,74 @@ optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model);
 
+/**
+ * @brief Convert a data_model_view_t to an optimization_problem_t.
+ *
+ * This function copies data from the view (which points to GPU memory)
+ * into an owning optimization_problem_t.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> object with spans pointing to GPU memory
+ * @return optimization_problem_t<i_t, f_t> owning container for the problem
+ */
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> data_model_view_to_optimization_problem(
+  raft::handle_t const* handle_ptr, const data_model_view_t<i_t, f_t>& view);
+
+/**
+ * @brief Linear programming solve function using data_model_view_t.
+ *
+ * This overload accepts a non-owning data_model_view_t which can point to either
+ * GPU memory (for local solves) or CPU memory (for remote solves).
+ * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST
+ * and CUOPT_REMOTE_PORT environment variables.
+ *
+ * @note Both primal and dual solutions are zero-initialized.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> with spans pointing to problem data
+ * @param[in] settings  A pdlp_solver_settings_t<i_t, f_t> object with the settings for the PDLP
+ * solver.
+ * @param[in] problem_checking  If true, the problem is checked for consistency.
+ * @param[in] use_pdlp_solver_mode  If true, the PDLP hyperparameters coming from the
+ * pdlp_solver_mode are used.
+ * @return optimization_problem_solution_t<i_t, f_t> owning container for the solver solution
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp(
+  raft::handle_t const* handle_ptr,
+  const data_model_view_t<i_t, f_t>& view,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+  bool problem_checking                            = true,
+  bool use_pdlp_solver_mode                        = true);
+
+/**
+ * @brief Mixed integer programming solve function using data_model_view_t.
+ *
+ * This overload accepts a non-owning data_model_view_t which can point to either
+ * GPU memory (for local solves) or CPU memory (for remote solves).
+ * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST
+ * and CUOPT_REMOTE_PORT environment variables.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> with spans pointing to problem data
+ * @param[in] settings  A mip_solver_settings_t<i_t, f_t> object with the settings for the MIP
+ * solver.
+ * @return mip_solution_t<i_t, f_t> owning container for the solver solution
+ */
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip(
+  raft::handle_t const* handle_ptr,
+  const data_model_view_t<i_t, f_t>& view,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index e1a75747d..abe49a2be 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -25,9 +25,19 @@ namespace cython {
 // aggregate for call_solve() return type
 // to be exposed to cython:
 struct linear_programming_ret_t {
+  // GPU (device) storage - populated for local GPU solves
   std::unique_ptr<rmm::device_buffer> primal_solution_;
   std::unique_ptr<rmm::device_buffer> dual_solution_;
   std::unique_ptr<rmm::device_buffer> reduced_cost_;
+
+  // CPU (host) storage - populated for remote solves
+  std::vector<double> primal_solution_host_;
+  std::vector<double> dual_solution_host_;
+  std::vector<double> reduced_cost_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   /* -- PDLP Warm Start Data -- */
   std::unique_ptr<rmm::device_buffer> current_primal_solution_;
   std::unique_ptr<rmm::device_buffer> current_dual_solution_;
@@ -64,8 +74,15 @@ struct linear_programming_ret_t {
 };
 
 struct mip_ret_t {
+  // GPU (device) storage - populated for local GPU solves
   std::unique_ptr<rmm::device_buffer> solution_;
 
+  // CPU (host) storage - populated for remote solves
+  std::vector<double> solution_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   linear_programming::mip_termination_status_t termination_status_;
   error_type_t error_status_;
   std::string error_message_;
diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp
new file mode 100644
index 000000000..9e0b53cd8
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp
@@ -0,0 +1,464 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/pdlp/solver_settings.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/mps_data_model.hpp>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Abstract interface for serializing/deserializing cuOpt problems and solutions.
+ *
+ * This interface allows users to provide custom serialization implementations
+ * for different wire formats (protobuf, JSON, msgpack, custom binary, etc.).
+ *
+ * The default implementation uses Protocol Buffers and is built into libcuopt.
+ * Users can provide their own implementation by:
+ * 1. Implementing this interface
+ * 2. Compiling to a shared library
+ * 3. Setting CUOPT_SERIALIZER_LIB environment variable to the library path
+ *
+ * @tparam i_t Index type (int32_t or int64_t)
+ * @tparam f_t Float type (float or double)
+ */
+template <typename i_t, typename f_t>
+class remote_serializer_t {
+ public:
+  virtual ~remote_serializer_t() = default;
+
+  //============================================================================
+  // Problem Serialization
+  //============================================================================
+
+  /**
+   * @brief Serialize an LP problem with settings to a byte buffer.
+   *
+   * @param view The problem data view (can point to CPU or GPU memory)
+   * @param settings Solver settings
+   * @return Serialized byte buffer ready for network transmission
+   */
+  virtual std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Serialize a MIP problem with settings to a byte buffer.
+   *
+   * @param view The problem data view (can point to CPU or GPU memory)
+   * @param settings Solver settings
+   * @return Serialized byte buffer ready for network transmission
+   */
+  virtual std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  //============================================================================
+  // Solution Deserialization
+  //============================================================================
+
+  /**
+   * @brief Deserialize an LP solution from a byte buffer.
+   *
+   * @param data The serialized solution bytes received from the server
+   * @return The deserialized LP solution object
+   */
+  virtual optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize a MIP solution from a byte buffer.
+   *
+   * @param data The serialized solution bytes received from the server
+   * @return The deserialized MIP solution object
+   */
+  virtual mip_solution_t<i_t, f_t> deserialize_mip_solution(const std::vector<uint8_t>& data) = 0;
+
+  //============================================================================
+  // Server-side: Request Deserialization & Response Serialization
+  //============================================================================
+
+  /**
+   * @brief Check if serialized data is an LP or MIP request.
+   *
+   * @param data The serialized request bytes
+   * @return true if MIP request, false if LP request
+   */
+  virtual bool is_mip_request(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize an LP request (problem + settings) from bytes.
+   *
+   * This is used by the server to receive problems from clients.
+   *
+   * @param data The serialized request bytes
+   * @param[out] view_data CPU storage that will be populated with problem data
+   * @param[out] settings Settings will be populated here
+   * @return true on success, false on parse error
+   */
+  virtual bool deserialize_lp_request(const std::vector<uint8_t>& data,
+                                      mps_parser::mps_data_model_t<i_t, f_t>& view_data,
+                                      pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Deserialize a MIP request (problem + settings) from bytes.
+   *
+   * @param data The serialized request bytes
+   * @param[out] view_data CPU storage that will be populated with problem data
+   * @param[out] settings Settings will be populated here
+   * @return true on success, false on parse error
+   */
+  virtual bool deserialize_mip_request(const std::vector<uint8_t>& data,
+                                       mps_parser::mps_data_model_t<i_t, f_t>& view_data,
+                                       mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Serialize an LP solution to bytes for sending to client.
+   *
+   * @param solution The LP solution to serialize
+   * @return Serialized byte buffer
+   */
+  virtual std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) = 0;
+
+  /**
+   * @brief Serialize a MIP solution to bytes for sending to client.
+   *
+   * @param solution The MIP solution to serialize
+   * @return Serialized byte buffer
+   */
+  virtual std::vector<uint8_t> serialize_mip_solution(const mip_solution_t<i_t, f_t>& solution) = 0;
+
+  //============================================================================
+  // Async Operations
+  //============================================================================
+
+  /**
+   * @brief Serialize a job submission request with async options.
+   *
+   * @param view Problem data
+   * @param settings LP solver settings
+   * @param blocking If true, server should wait and return result synchronously
+   * @return Serialized async request bytes
+   */
+  virtual std::vector<uint8_t> serialize_async_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) = 0;
+
+  /**
+   * @brief Serialize a job submission request with async options.
+   *
+   * @param view Problem data
+   * @param settings MIP solver settings
+   * @param blocking If true, server should wait and return result synchronously
+   * @return Serialized async request bytes
+   */
+  virtual std::vector<uint8_t> serialize_async_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) = 0;
+
+  /**
+   * @brief Serialize a status check request.
+   *
+   * @param job_id The job ID to check
+   * @return Serialized request bytes
+   */
+  virtual std::vector<uint8_t> serialize_status_request(const std::string& job_id) = 0;
+
+  /**
+   * @brief Serialize a get result request.
+   *
+   * @param job_id The job ID to get results for
+   * @return Serialized request bytes
+   */
+  virtual std::vector<uint8_t> serialize_get_result_request(const std::string& job_id) = 0;
+
+  /**
+   * @brief Serialize a delete request.
+   *
+   * @param job_id The job ID to delete
+   * @return Serialized request bytes
+   */
+  virtual std::vector<uint8_t> serialize_delete_request(const std::string& job_id) = 0;
+
+  /**
+   * @brief Serialize a get logs request.
+   *
+   * @param job_id The job ID to get logs for
+   * @param frombyte Byte offset to start reading from (0 = beginning)
+   * @return Serialized request bytes
+   */
+  virtual std::vector<uint8_t> serialize_get_logs_request(const std::string& job_id,
+                                                          int64_t frombyte = 0) = 0;
+
+  /**
+   * @brief Serialize a cancel job request.
+   *
+   * @param job_id The job ID to cancel
+   * @return Serialized request bytes
+   */
+  virtual std::vector<uint8_t> serialize_cancel_request(const std::string& job_id) = 0;
+
+  /**
+   * @brief Job status enumeration.
+   */
+  enum class job_status_t { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND, CANCELLED };
+
+  /**
+   * @brief Structure to hold log retrieval results.
+   */
+  struct logs_result_t {
+    std::vector<std::string> log_lines;  ///< Log lines read from file
+    int64_t nbytes;                      ///< Ending byte position (use as frombyte next time)
+    bool job_exists;                     ///< False if job_id not found
+  };
+
+  /**
+   * @brief Structure to hold cancel response results.
+   */
+  struct cancel_result_t {
+    bool success;             ///< True if cancel request was processed
+    std::string message;      ///< Success/error message
+    job_status_t job_status;  ///< Status of job after cancel attempt
+  };
+
+  /**
+   * @brief Deserialize job submission response.
+   *
+   * @param data Response bytes
+   * @param[out] job_id Job ID assigned by server (on success)
+   * @param[out] error_message Error message (on failure)
+   * @return true if submission succeeded
+   */
+  virtual bool deserialize_submit_response(const std::vector<uint8_t>& data,
+                                           std::string& job_id,
+                                           std::string& error_message) = 0;
+
+  /**
+   * @brief Deserialize status check response.
+   *
+   * @param data Response bytes
+   * @return Job status
+   */
+  virtual job_status_t deserialize_status_response(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize result response as LP solution.
+   *
+   * @param data Response bytes
+   * @return LP solution, or error solution if failed
+   */
+  virtual optimization_problem_solution_t<i_t, f_t> deserialize_lp_result_response(
+    const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize result response as MIP solution.
+   *
+   * @param data Response bytes
+   * @return MIP solution, or error solution if failed
+   */
+  virtual mip_solution_t<i_t, f_t> deserialize_mip_result_response(
+    const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize logs response.
+   *
+   * @param data Response bytes
+   * @return Logs result structure
+   */
+  virtual logs_result_t deserialize_logs_response(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize cancel response.
+   *
+   * @param data Response bytes
+   * @return Cancel result structure
+   */
+  virtual cancel_result_t deserialize_cancel_response(const std::vector<uint8_t>& data) = 0;
+
+  //============================================================================
+  // Server-side Async Request Handling
+  //============================================================================
+
+  /**
+   * @brief Check if serialized data is an async protocol request.
+   *
+   * Async requests contain RequestType field (SUBMIT_JOB, CHECK_STATUS, etc.)
+   *
+   * @param data The serialized request bytes
+   * @return true if this is an async protocol request
+   */
+  virtual bool is_async_request(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Get the async request type.
+   *
+   * @param data The serialized request bytes
+   * @return Request type: 0=SUBMIT_JOB, 1=CHECK_STATUS, 2=GET_RESULT, 3=DELETE_RESULT,
+   *         4=GET_LOGS, 5=CANCEL_JOB, 6=WAIT_FOR_RESULT
+   */
+  virtual int get_async_request_type(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Check if async request has blocking flag set.
+   *
+   * @param data The serialized request bytes
+   * @return true if blocking mode is requested
+   */
+  virtual bool is_blocking_request(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Extract problem data from an async SUBMIT_JOB request.
+   *
+   * @param data The serialized async request bytes
+   * @return The extracted problem data (LP or MIP request)
+   */
+  virtual std::vector<uint8_t> extract_problem_data(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Get job_id from async request (for CHECK_STATUS, GET_RESULT, DELETE_RESULT, GET_LOGS).
+   *
+   * @param data The serialized request bytes
+   * @return The job ID string
+   */
+  virtual std::string get_job_id(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Get frombyte from GET_LOGS request.
+   *
+   * @param data The serialized request bytes
+   * @return The byte offset to start reading from
+   */
+  virtual int64_t get_frombyte(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Serialize a job submission response.
+   *
+   * @param success Whether submission succeeded
+   * @param result On success: job_id, on failure: error message
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_submit_response(bool success,
+                                                         const std::string& result) = 0;
+
+  /**
+   * @brief Serialize a status check response.
+   *
+   * @param status_code Job status: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND
+   * @param message Status message
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_status_response(int status_code,
+                                                         const std::string& message) = 0;
+
+  /**
+   * @brief Serialize a get result response.
+   *
+   * @param success Whether result retrieval succeeded
+   * @param result_data The solution data (if success)
+   * @param error_message Error message (if failure)
+   * @param is_mip Whether this is a MIP solution (vs LP)
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_result_response(bool success,
+                                                         const std::vector<uint8_t>& result_data,
+                                                         const std::string& error_message,
+                                                         bool is_mip = false) = 0;
+
+  /**
+   * @brief Serialize a delete response.
+   *
+   * @param success Whether deletion succeeded
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_delete_response(bool success) = 0;
+
+  /**
+   * @brief Serialize a logs response.
+   *
+   * @param job_id The job ID
+   * @param log_lines Log lines read from file
+   * @param nbytes Ending byte position in log file
+   * @param job_exists False if job_id not found
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_logs_response(const std::string& job_id,
+                                                       const std::vector<std::string>& log_lines,
+                                                       int64_t nbytes,
+                                                       bool job_exists) = 0;
+
+  /**
+   * @brief Serialize a cancel response.
+   *
+   * @param success Whether cancel was successful
+   * @param message Success/error message
+   * @param status_code Job status after cancel: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED,
+   * 4=NOT_FOUND, 5=CANCELLED
+   * @return Serialized response bytes
+   */
+  virtual std::vector<uint8_t> serialize_cancel_response(bool success,
+                                                         const std::string& message,
+                                                         int status_code) = 0;
+
+  //============================================================================
+  // Metadata
+  //============================================================================
+
+  /**
+   * @brief Get the serialization format name (for logging/debugging).
+   *
+   * @return Format name string (e.g., "protobuf", "json", "msgpack")
+   */
+  virtual std::string format_name() const = 0;
+
+  /**
+   * @brief Get version of the serialization protocol.
+   *
+   * @return Protocol version number
+   */
+  virtual uint32_t protocol_version() const = 0;
+};
+
+/**
+ * @brief Factory function type for creating serializer instances.
+ *
+ * Custom serializer libraries must export a function with this signature
+ * named "create_cuopt_serializer".
+ */
+template <typename i_t, typename f_t>
+using serializer_factory_t = std::unique_ptr<remote_serializer_t<i_t, f_t>> (*)();
+
+/**
+ * @brief Get the default (protobuf) serializer instance.
+ *
+ * @return Shared pointer to the default serializer
+ */
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_default_serializer();
+
+/**
+ * @brief Get the currently configured serializer.
+ *
+ * Returns the custom serializer if CUOPT_SERIALIZER_LIB is set,
+ * otherwise returns the default protobuf serializer.
+ *
+ * @return Shared pointer to the serializer
+ */
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_serializer();
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
new file mode 100644
index 000000000..898a1cba5
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
@@ -0,0 +1,130 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/pdlp/solver_settings.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <mps_parser/data_model_view.hpp>
+
+#include <cstdlib>
+#include <optional>
+#include <string>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Configuration for remote solve connection
+ */
+struct remote_solve_config_t {
+  std::string host;
+  int port;
+};
+
+/**
+ * @brief Check if remote solve is enabled via environment variables.
+ *
+ * Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT
+ * environment variables are set.
+ *
+ * @return std::optional<remote_solve_config_t> containing the remote config if
+ *         remote solve is enabled, std::nullopt otherwise
+ */
+inline std::optional<remote_solve_config_t> get_remote_solve_config()
+{
+  const char* host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* port = std::getenv("CUOPT_REMOTE_PORT");
+
+  if (host != nullptr && port != nullptr && host[0] != '\0' && port[0] != '\0') {
+    try {
+      int port_num = std::stoi(port);
+      return remote_solve_config_t{std::string(host), port_num};
+    } catch (...) {
+      // Invalid port number, fall back to local solve
+      return std::nullopt;
+    }
+  }
+  return std::nullopt;
+}
+
+/**
+ * @brief Check if remote solve is enabled.
+ *
+ * @return true if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are both set
+ */
+inline bool is_remote_solve_enabled() { return get_remote_solve_config().has_value(); }
+
+/**
+ * @brief Solve an LP problem on a remote server.
+ *
+ * @tparam i_t Index type (int32_t)
+ * @tparam f_t Float type (float or double)
+ * @param config Remote server configuration
+ * @param view Problem data view
+ * @param settings Solver settings
+ * @return Solution from the remote server
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const pdlp_solver_settings_t<i_t, f_t>& settings);
+
+/**
+ * @brief Solve a MIP problem on a remote server.
+ *
+ * @tparam i_t Index type (int32_t)
+ * @tparam f_t Float type (float or double)
+ * @param config Remote server configuration
+ * @param view Problem data view
+ * @param settings Solver settings
+ * @return Solution from the remote server
+ */
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const mip_solver_settings_t<i_t, f_t>& settings);
+
+/**
+ * @brief Job status enumeration for remote jobs.
+ */
+enum class remote_job_status_t {
+  QUEUED,      ///< Job is waiting in queue
+  PROCESSING,  ///< Job is being processed by a worker
+  COMPLETED,   ///< Job completed successfully
+  FAILED,      ///< Job failed with an error
+  NOT_FOUND,   ///< Job ID not found on server
+  CANCELLED    ///< Job was cancelled
+};
+
+/**
+ * @brief Result of a cancel job request.
+ */
+struct cancel_job_result_t {
+  bool success;                    ///< True if cancellation was successful
+  std::string message;             ///< Success/error message
+  remote_job_status_t job_status;  ///< Status of job after cancel attempt
+};
+
+/**
+ * @brief Cancel a job on a remote server.
+ *
+ * This function can cancel jobs that are queued (waiting for a worker) or
+ * currently running. For running jobs, the worker process is killed and
+ * automatically restarted by the server.
+ *
+ * @param config Remote server configuration
+ * @param job_id The job ID to cancel
+ * @return Result containing success status, message, and job status
+ */
+cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config,
+                                      const std::string& job_id);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt
index 9c96cc911..dc2d271ea 100644
--- a/cpp/libmps_parser/CMakeLists.txt
+++ b/cpp/libmps_parser/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -64,7 +64,8 @@ if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
 
-add_library(mps_parser SHARED
+# Source files for mps_parser
+set(MPS_PARSER_SOURCES
   src/data_model_view.cpp
   src/mps_data_model.cpp
   src/mps_parser.cpp
@@ -74,6 +75,12 @@ add_library(mps_parser SHARED
   src/utilities/cython_mps_parser.cpp
 )
 
+# Shared library for standalone use
+add_library(mps_parser SHARED ${MPS_PARSER_SOURCES})
+
+# Static library for linking into libcuopt
+add_library(mps_parser_static STATIC ${MPS_PARSER_SOURCES})
+
 set_target_properties(mps_parser
   PROPERTIES BUILD_RPATH "\$ORIGIN"
   INSTALL_RPATH "\$ORIGIN"
@@ -84,10 +91,21 @@ set_target_properties(mps_parser
   INTERFACE_POSITION_INDEPENDENT_CODE ON
 )
 
+set_target_properties(mps_parser_static
+  PROPERTIES
+  CXX_STANDARD 17
+  CXX_STANDARD_REQUIRED ON
+  POSITION_INDEPENDENT_CODE ON
+)
+
 target_compile_options(mps_parser
   PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${MPS_PARSER_CXX_FLAGS}>"
 )
 
+target_compile_options(mps_parser_static
+  PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${MPS_PARSER_CXX_FLAGS}>"
+)
+
 if(WRITE_FATBIN)
   file(WRITE "${MPS_PARSER_BINARY_DIR}/fatbin.ld"
     [=[
@@ -101,6 +119,7 @@ if(WRITE_FATBIN)
 endif()
 
 add_library(cuopt::mps_parser ALIAS mps_parser)
+add_library(cuopt::mps_parser_static ALIAS mps_parser_static)
 
 # ##################################################################################################
 # - include paths ---------------------------------------------------------------------------------
@@ -113,6 +132,15 @@ target_include_directories(mps_parser
   "$<INSTALL_INTERFACE:include>"
 )
 
+target_include_directories(mps_parser_static
+  PRIVATE
+  "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty"
+  "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<INSTALL_INTERFACE:include>"
+)
+
 if(MPS_PARSER_WITH_BZIP2)
     target_include_directories(mps_parser PRIVATE BZip2::BZip2)
 endif(MPS_PARSER_WITH_BZIP2)
diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
index eb34682ce..3f340ae96 100644
--- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
+++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
@@ -406,8 +406,24 @@ class data_model_view_t {
    */
   bool has_quadratic_objective() const noexcept;
 
+  /**
+   * @brief Set whether the data pointed to by this view is in device (GPU) memory.
+   * @note Default is false (CPU memory). Set to true when view points to GPU buffers.
+   *
+   * @param is_device true if data is in GPU memory, false if in CPU memory
+   */
+  void set_is_device_memory(bool is_device) noexcept { is_device_memory_ = is_device; }
+
+  /**
+   * @brief Check if the data pointed to by this view is in device (GPU) memory.
+   *
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const noexcept { return is_device_memory_; }
+
  private:
   bool maximize_{false};
+  bool is_device_memory_{false};  // true if spans point to GPU memory, false for CPU
   span<f_t const> A_;
   span<i_t const> A_indices_;
   span<i_t const> A_offsets_;
diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp
index 7db2b390c..8be1b899a 100644
--- a/cpp/libmps_parser/src/data_model_view.cpp
+++ b/cpp/libmps_parser/src/data_model_view.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -9,6 +9,8 @@
 #include <mps_parser/utilities/span.hpp>
 #include <utilities/error.hpp>
 
+#include <cstdint>
+
 namespace cuopt::mps_parser {
 
 template <typename i_t, typename f_t>
@@ -348,7 +350,8 @@ bool data_model_view_t<i_t, f_t>::has_quadratic_objective() const noexcept
 
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
 template class data_model_view_t<int, float>;
-
 template class data_model_view_t<int, double>;
+template class data_model_view_t<int64_t, float>;
+template class data_model_view_t<int64_t, double>;
 
 }  // namespace cuopt::mps_parser
diff --git a/cpp/libmps_parser/src/mps_data_model.cpp b/cpp/libmps_parser/src/mps_data_model.cpp
index 7d0d44a03..605d5cef6 100644
--- a/cpp/libmps_parser/src/mps_data_model.cpp
+++ b/cpp/libmps_parser/src/mps_data_model.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -9,6 +9,7 @@
 #include <utilities/error.hpp>
 
 #include <algorithm>
+#include <cstdint>
 
 namespace cuopt::mps_parser {
 
@@ -462,8 +463,9 @@ bool mps_data_model_t<i_t, f_t>::has_quadratic_objective() const noexcept
 
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
 template class mps_data_model_t<int, float>;
-
 template class mps_data_model_t<int, double>;
+template class mps_data_model_t<int64_t, float>;
+template class mps_data_model_t<int64_t, double>;
 //  TODO current raft to cusparse wrappers only support int64_t
 //  can be CUSPARSE_INDEX_16U, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_64I
 
diff --git a/cpp/scripts/run_dev_server.sh b/cpp/scripts/run_dev_server.sh
new file mode 100755
index 000000000..cc6f51d90
--- /dev/null
+++ b/cpp/scripts/run_dev_server.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Development script to run the cuopt remote server with the correct library path
+# This is only needed during development when the build directory's libcuopt.so
+# needs to take precedence over the conda-installed version.
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BUILD_DIR="${SCRIPT_DIR}/../build"
+
+if [ ! -f "${BUILD_DIR}/cuopt_remote_server" ]; then
+    echo "Error: cuopt_remote_server not found. Please build with -DBUILD_REMOTE_SERVER=ON"
+    exit 1
+fi
+
+export LD_LIBRARY_PATH="${BUILD_DIR}:${LD_LIBRARY_PATH}"
+
+echo "Starting cuopt remote server..."
+echo "Build directory: ${BUILD_DIR}"
+echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
+echo "---"
+
+exec "${BUILD_DIR}/cuopt_remote_server" "$@"
diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt
index c3f673e03..c57f61930 100644
--- a/cpp/src/linear_programming/CMakeLists.txt
+++ b/cpp/src/linear_programming/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -8,6 +8,8 @@ set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/utilities/protobuf_serializer.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/utilities/remote_solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdhg.cu
diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp
index 0772dd14b..dcf354914 100644
--- a/cpp/src/linear_programming/cuopt_c.cpp
+++ b/cpp/src/linear_programming/cuopt_c.cpp
@@ -1,15 +1,17 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #include <cuopt/linear_programming/cuopt_c.h>
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <cuopt/utilities/timestamp_utils.hpp>
 #include <utilities/logger.hpp>
 
@@ -17,36 +19,251 @@
 
 #include <cuopt/version_config.hpp>
 
+#include <raft/core/copy.hpp>
+
 #include <cstdlib>
 #include <memory>
+#include <optional>
 #include <string>
+#include <vector>
 
 using namespace cuopt::mps_parser;
 using namespace cuopt::linear_programming;
 
+/**
+ * @brief CPU-side storage for problem data.
+ *
+ * This struct stores all problem data in CPU memory. At solve time, a data_model_view_t
+ * is created pointing to this data, and the solve_lp/solve_mip routines handle
+ * local vs remote solve automatically.
+ */
+struct problem_cpu_data_t {
+  // Problem dimensions
+  cuopt_int_t num_constraints = 0;
+  cuopt_int_t num_variables   = 0;
+
+  // Objective
+  bool maximize                  = false;
+  cuopt_float_t objective_offset = 0.0;
+  std::vector<cuopt_float_t> objective_coefficients;
+
+  // Quadratic objective (optional)
+  std::vector<cuopt_float_t> Q_values;
+  std::vector<cuopt_int_t> Q_indices;
+  std::vector<cuopt_int_t> Q_offsets;
+
+  // Constraint matrix (CSR format)
+  std::vector<cuopt_float_t> A_values;
+  std::vector<cuopt_int_t> A_indices;
+  std::vector<cuopt_int_t> A_offsets;
+
+  // Constraint bounds (two representations)
+  std::vector<char> row_types;                         // '<', '>', '=' style
+  std::vector<cuopt_float_t> constraint_bounds;        // single RHS for row_types style
+  std::vector<cuopt_float_t> constraint_lower_bounds;  // ranged style
+  std::vector<cuopt_float_t> constraint_upper_bounds;  // ranged style
+  bool uses_ranged_constraints = false;
+
+  // Variable bounds
+  std::vector<cuopt_float_t> variable_lower_bounds;
+  std::vector<cuopt_float_t> variable_upper_bounds;
+
+  // Variable types
+  std::vector<char> variable_types;  // 'C' for continuous, 'I' for integer
+
+  /**
+   * @brief Create a data_model_view_t pointing to this CPU data.
+   */
+  cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> create_view() const
+  {
+    cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> view;
+
+    view.set_maximize(maximize);
+    view.set_objective_offset(objective_offset);
+
+    if (!objective_coefficients.empty()) {
+      view.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+
+    if (!Q_values.empty()) {
+      view.set_quadratic_objective_matrix(Q_values.data(),
+                                          Q_values.size(),
+                                          Q_indices.data(),
+                                          Q_indices.size(),
+                                          Q_offsets.data(),
+                                          Q_offsets.size());
+    }
+
+    if (!A_values.empty()) {
+      view.set_csr_constraint_matrix(A_values.data(),
+                                     A_values.size(),
+                                     A_indices.data(),
+                                     A_indices.size(),
+                                     A_offsets.data(),
+                                     A_offsets.size());
+    }
+
+    if (uses_ranged_constraints) {
+      if (!constraint_lower_bounds.empty()) {
+        view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
+                                         constraint_lower_bounds.size());
+      }
+      if (!constraint_upper_bounds.empty()) {
+        view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
+                                         constraint_upper_bounds.size());
+      }
+    } else {
+      if (!row_types.empty()) { view.set_row_types(row_types.data(), row_types.size()); }
+      if (!constraint_bounds.empty()) {
+        view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+      }
+    }
+
+    if (!variable_lower_bounds.empty()) {
+      view.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+
+    if (!variable_upper_bounds.empty()) {
+      view.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+
+    if (!variable_types.empty()) {
+      view.set_variable_types(variable_types.data(), variable_types.size());
+    }
+
+    return view;
+  }
+
+  /**
+   * @brief Check if this is a MIP (has integer variables).
+   */
+  bool is_mip() const
+  {
+    for (char vt : variable_types) {
+      if (vt == CUOPT_INTEGER) { return true; }
+    }
+    return false;
+  }
+};
+
 struct problem_and_stream_view_t {
-  problem_and_stream_view_t()
-    : op_problem(nullptr), stream_view(rmm::cuda_stream_per_thread), handle(stream_view)
+  problem_and_stream_view_t() : cpu_data(nullptr), gpu_problem(nullptr), handle(nullptr) {}
+
+  /**
+   * @brief Ensure CUDA resources are initialized (lazy initialization).
+   * Only call this when local solve is needed.
+   */
+  void ensure_cuda_initialized()
+  {
+    if (!handle) { handle = std::make_unique<raft::handle_t>(); }
+  }
+
+  raft::handle_t* get_handle_ptr()
   {
+    ensure_cuda_initialized();
+    return handle.get();
+  }
+
+  /**
+   * @brief Check if this is a MIP problem.
+   */
+  bool is_mip() const
+  {
+    if (view.is_device_memory()) {
+      // GPU path: check gpu_problem's problem category
+      if (!gpu_problem) return false;
+      auto cat = gpu_problem->get_problem_category();
+      return (cat == problem_category_t::MIP) || (cat == problem_category_t::IP);
+    } else {
+      // CPU path: check variable types in cpu_data
+      if (!cpu_data) return false;
+      return cpu_data->is_mip();
+    }
+  }
+
+  // Only ONE of these is allocated (optimized memory usage):
+  std::unique_ptr<problem_cpu_data_t> cpu_data;  // for remote solve (CPU memory)
+  std::unique_ptr<optimization_problem_t<cuopt_int_t, cuopt_float_t>>
+    gpu_problem;  // for local solve (GPU memory)
+
+  // Non-owning view pointing to whichever storage is active
+  // Use view.is_device_memory() to check if data is on GPU or CPU
+  cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> view;
+
+  // Lazy-initialized CUDA handle (only created for local solve)
+  std::unique_ptr<raft::handle_t> handle;
+
+  /**
+   * @brief Create a view pointing to GPU data from the gpu_problem.
+   * Call this after gpu_problem is fully populated.
+   */
+  void create_view_from_gpu_problem()
+  {
+    if (!gpu_problem) return;
+    auto& gpu = *gpu_problem;
+
+    view.set_maximize(gpu.get_sense());
+    view.set_objective_offset(gpu.get_objective_offset());
+    view.set_objective_coefficients(gpu.get_objective_coefficients().data(), gpu.get_n_variables());
+    view.set_csr_constraint_matrix(gpu.get_constraint_matrix_values().data(),
+                                   gpu.get_constraint_matrix_values().size(),
+                                   gpu.get_constraint_matrix_indices().data(),
+                                   gpu.get_constraint_matrix_indices().size(),
+                                   gpu.get_constraint_matrix_offsets().data(),
+                                   gpu.get_constraint_matrix_offsets().size());
+
+    if (!gpu.get_constraint_lower_bounds().is_empty()) {
+      view.set_constraint_lower_bounds(gpu.get_constraint_lower_bounds().data(),
+                                       gpu.get_n_constraints());
+      view.set_constraint_upper_bounds(gpu.get_constraint_upper_bounds().data(),
+                                       gpu.get_n_constraints());
+    } else if (!gpu.get_row_types().is_empty()) {
+      view.set_row_types(gpu.get_row_types().data(), gpu.get_n_constraints());
+      view.set_constraint_bounds(gpu.get_constraint_bounds().data(), gpu.get_n_constraints());
+    }
+
+    view.set_variable_lower_bounds(gpu.get_variable_lower_bounds().data(), gpu.get_n_variables());
+    view.set_variable_upper_bounds(gpu.get_variable_upper_bounds().data(), gpu.get_n_variables());
+
+    // Note: variable_types in optimization_problem_t uses var_t enum, not char
+    // The view's variable_types span will point to GPU memory with var_t values
+    // This is handled specially in solve routines
+
+    if (gpu.has_quadratic_objective()) {
+      view.set_quadratic_objective_matrix(gpu.get_quadratic_objective_values().data(),
+                                          gpu.get_quadratic_objective_values().size(),
+                                          gpu.get_quadratic_objective_indices().data(),
+                                          gpu.get_quadratic_objective_indices().size(),
+                                          gpu.get_quadratic_objective_offsets().data(),
+                                          gpu.get_quadratic_objective_offsets().size());
+    }
+
+    view.set_is_device_memory(true);
+  }
+
+  /**
+   * @brief Create a view pointing to CPU data from cpu_data.
+   * Call this after cpu_data is fully populated.
+   */
+  void create_view_from_cpu_data()
+  {
+    if (!cpu_data) return;
+    view = cpu_data->create_view();
+    view.set_is_device_memory(false);
   }
-  raft::handle_t* get_handle_ptr() { return &handle; }
-  cuopt::linear_programming::optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem;
-  rmm::cuda_stream_view stream_view;
-  raft::handle_t handle;
 };
 
 struct solution_and_stream_view_t {
-  solution_and_stream_view_t(bool solution_for_mip, rmm::cuda_stream_view stream_view)
-    : is_mip(solution_for_mip),
-      mip_solution_ptr(nullptr),
-      lp_solution_ptr(nullptr),
-      stream_view(stream_view)
+  solution_and_stream_view_t(bool solution_for_mip, raft::handle_t* handle_ptr = nullptr)
+    : is_mip(solution_for_mip), mip_solution_ptr(nullptr), lp_solution_ptr(nullptr)
   {
+    // Store stream only if we have a handle (local solve)
+    if (handle_ptr) { stream_view = handle_ptr->get_stream(); }
   }
   bool is_mip;
   mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution_ptr;
   optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* lp_solution_ptr;
-  rmm::cuda_stream_view stream_view;
+  std::optional<rmm::cuda_stream_view> stream_view;  // Only present for local solve
 };
 
 int8_t cuOptGetFloatSize() { return sizeof(cuopt_float_t); }
@@ -77,6 +294,7 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       parse_mps<cuopt_int_t, cuopt_float_t>(filename_str, input_mps_strict));
   } catch (const std::exception& e) {
     CUOPT_LOG_INFO("Error parsing MPS file: %s", e.what());
+    delete problem_and_stream;
     *problem_ptr = nullptr;
     if (std::string(e.what()).find("Error opening MPS file") != std::string::npos) {
       return CUOPT_MPS_FILE_ERROR;
@@ -84,11 +302,64 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       return CUOPT_MPS_PARSE_ERROR;
     }
   }
-  optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(mps_data_model_to_optimization_problem(
-      problem_and_stream->get_handle_ptr(), *mps_data_model_ptr));
-  problem_and_stream->op_problem = op_problem;
-  *problem_ptr                   = static_cast<cuOptOptimizationProblem>(problem_and_stream);
+
+  // Check remote solve configuration at creation time
+  bool is_remote = is_remote_solve_enabled();
+
+  if (is_remote) {
+    // Remote: store in CPU memory
+    problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+    auto& cpu_data               = *problem_and_stream->cpu_data;
+    const auto& mps              = *mps_data_model_ptr;
+
+    cpu_data.num_constraints =
+      static_cast<cuopt_int_t>(mps.get_constraint_matrix_offsets().size() - 1);
+    cpu_data.num_variables    = static_cast<cuopt_int_t>(mps.get_objective_coefficients().size());
+    cpu_data.maximize         = mps.get_sense();
+    cpu_data.objective_offset = mps.get_objective_offset();
+
+    cpu_data.objective_coefficients = mps.get_objective_coefficients();
+    cpu_data.A_values               = mps.get_constraint_matrix_values();
+    cpu_data.A_indices              = mps.get_constraint_matrix_indices();
+    cpu_data.A_offsets              = mps.get_constraint_matrix_offsets();
+
+    if (!mps.get_constraint_lower_bounds().empty() || !mps.get_constraint_upper_bounds().empty()) {
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds = mps.get_constraint_lower_bounds();
+      cpu_data.constraint_upper_bounds = mps.get_constraint_upper_bounds();
+    } else {
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.constraint_bounds       = mps.get_constraint_bounds();
+      const auto& mps_row_types        = mps.get_row_types();
+      cpu_data.row_types.resize(mps_row_types.size());
+      for (size_t i = 0; i < mps_row_types.size(); ++i) {
+        cpu_data.row_types[i] = mps_row_types[i];
+      }
+    }
+
+    cpu_data.variable_lower_bounds = mps.get_variable_lower_bounds();
+    cpu_data.variable_upper_bounds = mps.get_variable_upper_bounds();
+
+    const auto& mps_var_types = mps.get_variable_types();
+    cpu_data.variable_types.resize(mps_var_types.size());
+    for (size_t i = 0; i < mps_var_types.size(); ++i) {
+      cpu_data.variable_types[i] =
+        (mps_var_types[i] == 'I' || mps_var_types[i] == 'B') ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
+    }
+
+    // Create view pointing to CPU data
+    problem_and_stream->create_view_from_cpu_data();
+  } else {
+    // Local: store in GPU memory using existing mps_data_model_to_optimization_problem
+    problem_and_stream->gpu_problem =
+      std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+        mps_data_model_to_optimization_problem(problem_and_stream->get_handle_ptr(),
+                                               *mps_data_model_ptr));
+    // Create view pointing to GPU data
+    problem_and_stream->create_view_from_gpu_problem();
+  }
+
+  *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   return CUOPT_SUCCESS;
 }
 
@@ -118,32 +389,76 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
-    std::vector<var_t> variable_types_host(num_variables);
-    for (int j = 0; j < num_variables; j++) {
-      variable_types_host[j] =
-        variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints);
+      cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables);
+      cpu_data.variable_types.assign(variable_types, variable_types + num_variables);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_row_types(constraint_sense, num_constraints);
+      gpu_problem.set_constraint_bounds(rhs, num_constraints);
+      gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables);
+
+      // Convert variable types to enum
+      std::vector<var_t> variable_types_host(num_variables);
+      for (cuopt_int_t j = 0; j < num_variables; j++) {
+        variable_types_host[j] =
+          variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+      }
+      gpu_problem.set_variable_types(variable_types_host.data(), num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -175,34 +490,79 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
-    std::vector<var_t> variable_types_host(num_variables);
-    for (int j = 0; j < num_variables; j++) {
-      variable_types_host[j] =
-        variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds,
+                                              constraint_lower_bounds + num_constraints);
+      cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds,
+                                              constraint_upper_bounds + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(variable_lower_bounds,
+                                            variable_lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(variable_upper_bounds,
+                                            variable_upper_bounds + num_variables);
+      cpu_data.variable_types.assign(variable_types, variable_types + num_variables);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+      gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+      gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+      std::vector<var_t> variable_types_host(num_variables);
+      for (cuopt_int_t j = 0; j < num_variables; j++) {
+        variable_types_host[j] =
+          variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+      }
+      gpu_problem.set_variable_types(variable_types_host.data(), num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -239,34 +599,83 @@ cuopt_int_t cuOptCreateQuadraticProblem(
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
-    cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
+    cuopt_int_t nnz   = constraint_matrix_row_offsets[num_constraints];
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+
+      cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values,
+                               quadratic_objective_matrix_coefficent_values + Q_nnz);
+      cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices,
+                                quadratic_objective_matrix_column_indices + Q_nnz);
+      cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets,
+                                quadratic_objective_matrix_row_offsets + num_variables + 1);
+
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints);
+      cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables);
+      cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_column_indices,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_row_offsets,
+                                                 num_variables + 1);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_row_types(constraint_sense, num_constraints);
+      gpu_problem.set_constraint_bounds(rhs, num_constraints);
+      gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
+    }
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -304,36 +713,87 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem(
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
-    cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+    cuopt_int_t nnz   = constraint_matrix_row_offsets[num_constraints];
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+
+      cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values,
+                               quadratic_objective_matrix_coefficent_values + Q_nnz);
+      cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices,
+                                quadratic_objective_matrix_column_indices + Q_nnz);
+      cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets,
+                                quadratic_objective_matrix_row_offsets + num_variables + 1);
+
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds,
+                                              constraint_lower_bounds + num_constraints);
+      cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds,
+                                              constraint_upper_bounds + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(variable_lower_bounds,
+                                            variable_lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(variable_upper_bounds,
+                                            variable_upper_bounds + num_variables);
+      cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_column_indices,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_row_offsets,
+                                                 num_variables + 1);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+      gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+      gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
+    }
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -343,7 +803,9 @@ void cuOptDestroyProblem(cuOptOptimizationProblem* problem_ptr)
 {
   if (problem_ptr == nullptr) { return; }
   if (*problem_ptr == nullptr) { return; }
-  delete static_cast<problem_and_stream_view_t*>(*problem_ptr);
+  problem_and_stream_view_t* problem_and_stream =
+    static_cast<problem_and_stream_view_t*>(*problem_ptr);
+  delete problem_and_stream;
   *problem_ptr = nullptr;
 }
 
@@ -354,7 +816,11 @@ cuopt_int_t cuOptGetNumConstraints(cuOptOptimizationProblem problem,
   if (num_constraints_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_constraints_ptr = problem_and_stream_view->op_problem->get_n_constraints();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_constraints_ptr = problem_and_stream_view->cpu_data->num_constraints;
+  } else {
+    *num_constraints_ptr = problem_and_stream_view->gpu_problem->get_n_constraints();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -364,7 +830,11 @@ cuopt_int_t cuOptGetNumVariables(cuOptOptimizationProblem problem, cuopt_int_t*
   if (num_variables_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_variables_ptr = problem_and_stream_view->op_problem->get_n_variables();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_variables_ptr = problem_and_stream_view->cpu_data->num_variables;
+  } else {
+    *num_variables_ptr = problem_and_stream_view->gpu_problem->get_n_variables();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -375,8 +845,13 @@ cuopt_int_t cuOptGetObjectiveSense(cuOptOptimizationProblem problem,
   if (objective_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *objective_sense_ptr =
-    problem_and_stream_view->op_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *objective_sense_ptr =
+      problem_and_stream_view->cpu_data->maximize ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  } else {
+    *objective_sense_ptr =
+      problem_and_stream_view->gpu_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -387,7 +862,11 @@ cuopt_int_t cuOptGetObjectiveOffset(cuOptOptimizationProblem problem,
   if (objective_offset_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *objective_offset_ptr = problem_and_stream_view->op_problem->get_objective_offset();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *objective_offset_ptr = problem_and_stream_view->cpu_data->objective_offset;
+  } else {
+    *objective_offset_ptr = problem_and_stream_view->gpu_problem->get_objective_offset();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -398,13 +877,17 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem,
   if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& objective_coefficients =
-    problem_and_stream_view->op_problem->get_objective_coefficients();
-  raft::copy(objective_coefficients_ptr,
-             objective_coefficients.data(),
-             objective_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& coeffs = problem_and_stream_view->cpu_data->objective_coefficients;
+    std::copy(coeffs.begin(), coeffs.end(), objective_coefficients_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(objective_coefficients_ptr,
+               gpu_problem.get_objective_coefficients().data(),
+               gpu_problem.get_n_variables(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -415,7 +898,13 @@ cuopt_int_t cuOptGetNumNonZeros(cuOptOptimizationProblem problem,
   if (num_non_zero_elements_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_non_zero_elements_ptr = problem_and_stream_view->op_problem->get_nnz();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_non_zero_elements_ptr =
+      static_cast<cuopt_int_t>(problem_and_stream_view->cpu_data->A_values.size());
+  } else {
+    *num_non_zero_elements_ptr = static_cast<cuopt_int_t>(
+      problem_and_stream_view->gpu_problem->get_constraint_matrix_values().size());
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -430,25 +919,32 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem,
   if (constraint_matrix_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& constraint_matrix_coefficients =
-    problem_and_stream_view->op_problem->get_constraint_matrix_values();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_column_indices =
-    problem_and_stream_view->op_problem->get_constraint_matrix_indices();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_row_offsets =
-    problem_and_stream_view->op_problem->get_constraint_matrix_offsets();
-  raft::copy(constraint_matrix_coefficients_ptr,
-             constraint_matrix_coefficients.data(),
-             constraint_matrix_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_column_indices_ptr,
-             constraint_matrix_column_indices.data(),
-             constraint_matrix_column_indices.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_row_offsets_ptr,
-             constraint_matrix_row_offsets.data(),
-             constraint_matrix_row_offsets.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& cpu_data = *problem_and_stream_view->cpu_data;
+    std::copy(
+      cpu_data.A_values.begin(), cpu_data.A_values.end(), constraint_matrix_coefficients_ptr);
+    std::copy(
+      cpu_data.A_indices.begin(), cpu_data.A_indices.end(), constraint_matrix_column_indices_ptr);
+    std::copy(
+      cpu_data.A_offsets.begin(), cpu_data.A_offsets.end(), constraint_matrix_row_offsets_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    auto stream             = gpu_problem.get_handle_ptr()->get_stream();
+    raft::copy(constraint_matrix_coefficients_ptr,
+               gpu_problem.get_constraint_matrix_values().data(),
+               gpu_problem.get_constraint_matrix_values().size(),
+               stream);
+    raft::copy(constraint_matrix_column_indices_ptr,
+               gpu_problem.get_constraint_matrix_indices().data(),
+               gpu_problem.get_constraint_matrix_indices().size(),
+               stream);
+    raft::copy(constraint_matrix_row_offsets_ptr,
+               gpu_problem.get_constraint_matrix_offsets().data(),
+               gpu_problem.get_constraint_matrix_offsets().size(),
+               stream);
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -458,13 +954,18 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons
   if (constraint_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<char>& constraint_sense =
-    problem_and_stream_view->op_problem->get_row_types();
-  raft::copy(constraint_sense_ptr,
-             constraint_sense.data(),
-             constraint_sense.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& row_types = problem_and_stream_view->cpu_data->row_types;
+    std::copy(row_types.begin(), row_types.end(), constraint_sense_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(constraint_sense_ptr,
+               gpu_problem.get_row_types().data(),
+               gpu_problem.get_row_types().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -475,10 +976,18 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem,
   if (rhs_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& rhs =
-    problem_and_stream_view->op_problem->get_constraint_bounds();
-  raft::copy(rhs_ptr, rhs.data(), rhs.size(), problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_bounds;
+    std::copy(bounds.begin(), bounds.end(), rhs_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(rhs_ptr,
+               gpu_problem.get_constraint_bounds().data(),
+               gpu_problem.get_constraint_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -489,13 +998,18 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_constraint_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_lower_bounds;
+    std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(lower_bounds_ptr,
+               gpu_problem.get_constraint_lower_bounds().data(),
+               gpu_problem.get_constraint_lower_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -506,13 +1020,18 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_constraint_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_upper_bounds;
+    std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(upper_bounds_ptr,
+               gpu_problem.get_constraint_upper_bounds().data(),
+               gpu_problem.get_constraint_upper_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -523,13 +1042,18 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_variable_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->variable_lower_bounds;
+    std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(lower_bounds_ptr,
+               gpu_problem.get_variable_lower_bounds().data(),
+               gpu_problem.get_variable_lower_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -540,13 +1064,18 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_variable_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->variable_upper_bounds;
+    std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(upper_bounds_ptr,
+               gpu_problem.get_variable_upper_bounds().data(),
+               gpu_problem.get_variable_upper_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -556,17 +1085,24 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab
   if (variable_types_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<var_t>& variable_types =
-    problem_and_stream_view->op_problem->get_variable_types();
-  std::vector<cuopt::linear_programming::var_t> variable_types_host(variable_types.size());
-  raft::copy(variable_types_host.data(),
-             variable_types.data(),
-             variable_types.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
-  for (size_t j = 0; j < variable_types_host.size(); j++) {
-    variable_types_ptr[j] =
-      variable_types_host[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& var_types = problem_and_stream_view->cpu_data->variable_types;
+    std::copy(var_types.begin(), var_types.end(), variable_types_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    auto num_vars           = gpu_problem.get_n_variables();
+    std::vector<var_t> gpu_var_types(num_vars);
+    raft::copy(gpu_var_types.data(),
+               gpu_problem.get_variable_types().data(),
+               num_vars,
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+    // Convert from var_t enum to char
+    for (cuopt_int_t i = 0; i < num_vars; ++i) {
+      variable_types_ptr[i] =
+        (gpu_var_types[i] == var_t::CONTINUOUS) ? CUOPT_CONTINUOUS : CUOPT_INTEGER;
+    }
   }
   return CUOPT_SUCCESS;
 }
@@ -712,10 +1248,7 @@ cuopt_int_t cuOptIsMIP(cuOptOptimizationProblem problem, cuopt_int_t* is_mip_ptr
   if (is_mip_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  bool is_mip =
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP) ||
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP);
-  *is_mip_ptr = static_cast<cuopt_int_t>(is_mip);
+  *is_mip_ptr = static_cast<cuopt_int_t>(problem_and_stream_view->is_mip());
   return CUOPT_SUCCESS;
 }
 
@@ -728,44 +1261,97 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem,
   if (problem == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (settings == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (solution_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
+
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  if (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP ||
-      problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP) {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
-    mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
-      solver_settings->get_mip_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(true, problem_and_stream_view->stream_view);
-    solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
-      solve_mip<cuopt_int_t, cuopt_float_t>(*op_problem, mip_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+  solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
+    static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
+
+  bool is_mip = problem_and_stream_view->is_mip();
+
+  // Use the view - solve_lp/solve_mip will check is_device_memory() to determine path
+  const auto& view = problem_and_stream_view->view;
+
+  if (view.is_device_memory()) {
+    // Local path: data is already on GPU
+    // Use gpu_problem directly for optimal performance (no extra copy)
+    auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+
+    if (is_mip) {
+      mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
+        solver_settings->get_mip_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(true, problem_and_stream_view->handle.get());
+
+      solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
+        solve_mip<cuopt_int_t, cuopt_float_t>(gpu_problem, mip_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+    } else {
+      pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
+        solver_settings->get_pdlp_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(false, problem_and_stream_view->handle.get());
+
+      solution_and_stream_view->lp_solution_ptr =
+        new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
+          solve_lp<cuopt_int_t, cuopt_float_t>(gpu_problem, pdlp_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    }
   } else {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
-    pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
-      solver_settings->get_pdlp_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(false, problem_and_stream_view->stream_view);
-    solution_and_stream_view->lp_solution_ptr =
-      new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
-        solve_lp<cuopt_int_t, cuopt_float_t>(*op_problem, pdlp_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    // CPU path: use view directly - solve_lp/solve_mip handle remote vs local conversion
+    // For remote solve, handle may be nullptr (no CUDA)
+    // For local solve with CPU data, handle will be created lazily
+    raft::handle_t* handle_ptr =
+      is_remote_solve_enabled() ? nullptr : problem_and_stream_view->get_handle_ptr();
+
+    if (is_mip) {
+      mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
+        solver_settings->get_mip_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(true, handle_ptr);
+
+      solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
+        solve_mip<cuopt_int_t, cuopt_float_t>(handle_ptr, view, mip_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+    } else {
+      pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
+        solver_settings->get_pdlp_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(false, handle_ptr);
+
+      solution_and_stream_view->lp_solution_ptr =
+        new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
+          solve_lp<cuopt_int_t, cuopt_float_t>(handle_ptr, view, pdlp_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    }
   }
 }
 
@@ -856,24 +1442,34 @@ cuopt_int_t cuOptGetPrimalSolution(cuOptSolution solution, cuopt_float_t* soluti
     mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
       static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->mip_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
-    rmm::cuda_stream_view stream_view{};
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (mip_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
+      raft::copy(solution_values_ptr,
+                 solution_values.data(),
+                 solution_values.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& solution_values = mip_solution->get_solution_host();
+      std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
+    }
   } else {
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values =
-      optimization_problem_solution->get_primal_solution();
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& solution_values =
+        optimization_problem_solution->get_primal_solution();
+      raft::copy(solution_values_ptr,
+                 solution_values.data(),
+                 solution_values.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& solution_values =
+        optimization_problem_solution->get_primal_solution_host();
+      std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
+    }
   }
   return CUOPT_SUCCESS;
 }
@@ -964,13 +1560,19 @@ cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_sol
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& dual_solution =
-      optimization_problem_solution->get_dual_solution();
-    raft::copy(dual_solution_ptr,
-               dual_solution.data(),
-               dual_solution.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& dual_solution =
+        optimization_problem_solution->get_dual_solution();
+      raft::copy(dual_solution_ptr,
+                 dual_solution.data(),
+                 dual_solution.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& dual_solution =
+        optimization_problem_solution->get_dual_solution_host();
+      std::copy(dual_solution.begin(), dual_solution.end(), dual_solution_ptr);
+    }
     return CUOPT_SUCCESS;
   }
 }
@@ -1005,13 +1607,19 @@ cuopt_int_t cuOptGetReducedCosts(cuOptSolution solution, cuopt_float_t* reduced_
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& reduced_cost =
-      optimization_problem_solution->get_reduced_cost();
-    raft::copy(reduced_cost_ptr,
-               reduced_cost.data(),
-               reduced_cost.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& reduced_cost =
+        optimization_problem_solution->get_reduced_cost();
+      raft::copy(reduced_cost_ptr,
+                 reduced_cost.data(),
+                 reduced_cost.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& reduced_cost =
+        optimization_problem_solution->get_reduced_cost_host();
+      std::copy(reduced_cost.begin(), reduced_cost.end(), reduced_cost_ptr);
+    }
     return CUOPT_SUCCESS;
   }
 }
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index d038ade72..0f57a8660 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -23,6 +23,7 @@
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 
 #include <mps_parser/mps_data_model.hpp>
 #include <utilities/copy_helpers.hpp>
@@ -40,7 +41,8 @@
 #include <raft/core/device_setter.hpp>
 #include <raft/core/handle.hpp>
 
-#include <thread>  // For std::thread
+#include <cstring>  // For std::memcpy
+#include <thread>   // For std::thread
 
 namespace cuopt::linear_programming {
 
@@ -1040,6 +1042,91 @@ cuopt::linear_programming::optimization_problem_t<i_t, f_t> mps_data_model_to_op
   return op_problem;
 }
 
+// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path)
+template <typename i_t, typename f_t>
+static data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  return view;
+}
+
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp(
   raft::handle_t const* handle_ptr,
@@ -1048,34 +1135,350 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
   bool problem_checking,
   bool use_pdlp_solver_mode)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  // Create a view pointing to CPU data and delegate to the view-based overload.
+  // The view overload handles local vs remote solve automatically.
+  auto view = create_view_from_mps_data_model(mps_data_model);
+  view.set_is_device_memory(false);  // MPS data is always in CPU memory
+  return solve_lp(handle_ptr, view, settings, problem_checking, use_pdlp_solver_mode);
+}
+
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> data_model_view_to_optimization_problem(
+  raft::handle_t const* handle_ptr, const data_model_view_t<i_t, f_t>& view)
+{
+  optimization_problem_t<i_t, f_t> op_problem(handle_ptr);
+  op_problem.set_maximize(view.get_sense());
+
+  op_problem.set_csr_constraint_matrix(view.get_constraint_matrix_values().data(),
+                                       view.get_constraint_matrix_values().size(),
+                                       view.get_constraint_matrix_indices().data(),
+                                       view.get_constraint_matrix_indices().size(),
+                                       view.get_constraint_matrix_offsets().data(),
+                                       view.get_constraint_matrix_offsets().size());
+
+  if (view.get_constraint_bounds().size() != 0) {
+    op_problem.set_constraint_bounds(view.get_constraint_bounds().data(),
+                                     view.get_constraint_bounds().size());
+  }
+  if (view.get_objective_coefficients().size() != 0) {
+    op_problem.set_objective_coefficients(view.get_objective_coefficients().data(),
+                                          view.get_objective_coefficients().size());
+  }
+  op_problem.set_objective_scaling_factor(view.get_objective_scaling_factor());
+  op_problem.set_objective_offset(view.get_objective_offset());
+  if (view.get_variable_lower_bounds().size() != 0) {
+    op_problem.set_variable_lower_bounds(view.get_variable_lower_bounds().data(),
+                                         view.get_variable_lower_bounds().size());
+  }
+  if (view.get_variable_upper_bounds().size() != 0) {
+    op_problem.set_variable_upper_bounds(view.get_variable_upper_bounds().data(),
+                                         view.get_variable_upper_bounds().size());
+  }
+  if (view.get_variable_types().size() != 0) {
+    auto var_types = view.get_variable_types();
+
+    // Check if the pointer is on host or device
+    cudaPointerAttributes attrs;
+    cudaError_t err = cudaPointerGetAttributes(&attrs, var_types.data());
+
+    std::vector<char> host_var_types(var_types.size());
+    if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) {
+      // Source is on GPU - copy to host
+      cudaMemcpy(host_var_types.data(),
+                 var_types.data(),
+                 var_types.size() * sizeof(char),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      // Source is on host (or unregistered) - direct copy
+      cudaGetLastError();  // Clear any error from cudaPointerGetAttributes
+      std::memcpy(host_var_types.data(), var_types.data(), var_types.size() * sizeof(char));
+    }
+
+    std::vector<var_t> enum_variable_types(var_types.size());
+    for (std::size_t i = 0; i < var_types.size(); ++i) {
+      enum_variable_types[i] = host_var_types[i] == 'I' ? var_t::INTEGER : var_t::CONTINUOUS;
+    }
+    op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size());
+  }
+
+  if (view.get_row_types().size() != 0) {
+    op_problem.set_row_types(view.get_row_types().data(), view.get_row_types().size());
+  }
+  if (view.get_constraint_lower_bounds().size() != 0) {
+    op_problem.set_constraint_lower_bounds(view.get_constraint_lower_bounds().data(),
+                                           view.get_constraint_lower_bounds().size());
+  }
+  if (view.get_constraint_upper_bounds().size() != 0) {
+    op_problem.set_constraint_upper_bounds(view.get_constraint_upper_bounds().data(),
+                                           view.get_constraint_upper_bounds().size());
+  }
+
+  if (view.get_objective_name().size() != 0) {
+    op_problem.set_objective_name(view.get_objective_name());
+  }
+  if (view.get_problem_name().size() != 0) {
+    op_problem.set_problem_name(view.get_problem_name().data());
+  }
+  if (view.get_variable_names().size() != 0) {
+    op_problem.set_variable_names(view.get_variable_names());
+  }
+  if (view.get_row_names().size() != 0) { op_problem.set_row_names(view.get_row_names()); }
+
+  if (view.has_quadratic_objective()) {
+    // Copy quadratic objective from view to vectors first since we need host data
+    std::vector<f_t> Q_values(view.get_quadratic_objective_values().size());
+    std::vector<i_t> Q_indices(view.get_quadratic_objective_indices().size());
+    std::vector<i_t> Q_offsets(view.get_quadratic_objective_offsets().size());
+
+    // Check if the pointer is on host or device
+    cudaPointerAttributes attrs;
+    cudaError_t err =
+      cudaPointerGetAttributes(&attrs, view.get_quadratic_objective_values().data());
+
+    if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) {
+      // Source is on GPU - copy to host
+      cudaMemcpy(Q_values.data(),
+                 view.get_quadratic_objective_values().data(),
+                 Q_values.size() * sizeof(f_t),
+                 cudaMemcpyDeviceToHost);
+      cudaMemcpy(Q_indices.data(),
+                 view.get_quadratic_objective_indices().data(),
+                 Q_indices.size() * sizeof(i_t),
+                 cudaMemcpyDeviceToHost);
+      cudaMemcpy(Q_offsets.data(),
+                 view.get_quadratic_objective_offsets().data(),
+                 Q_offsets.size() * sizeof(i_t),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      // Source is on host - direct copy
+      cudaGetLastError();  // Clear any error from cudaPointerGetAttributes
+      std::memcpy(Q_values.data(),
+                  view.get_quadratic_objective_values().data(),
+                  Q_values.size() * sizeof(f_t));
+      std::memcpy(Q_indices.data(),
+                  view.get_quadratic_objective_indices().data(),
+                  Q_indices.size() * sizeof(i_t));
+      std::memcpy(Q_offsets.data(),
+                  view.get_quadratic_objective_offsets().data(),
+                  Q_offsets.size() * sizeof(i_t));
+    }
+
+    op_problem.set_quadratic_objective_matrix(Q_values.data(),
+                                              Q_values.size(),
+                                              Q_indices.data(),
+                                              Q_indices.size(),
+                                              Q_offsets.data(),
+                                              Q_offsets.size());
+  }
+
+  return op_problem;
+}
+
+// Helper struct to hold CPU copies of GPU data for remote solve
+template <typename i_t, typename f_t>
+struct cpu_problem_data_t {
+  std::vector<f_t> A_values;
+  std::vector<i_t> A_indices;
+  std::vector<i_t> A_offsets;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> constraint_lower_bounds;
+  std::vector<f_t> constraint_upper_bounds;
+  std::vector<f_t> objective_coefficients;
+  std::vector<f_t> variable_lower_bounds;
+  std::vector<f_t> variable_upper_bounds;
+  std::vector<char> variable_types;
+  std::vector<f_t> quadratic_objective_values;
+  std::vector<i_t> quadratic_objective_indices;
+  std::vector<i_t> quadratic_objective_offsets;
+  bool maximize;
+  f_t objective_scaling_factor;
+  f_t objective_offset;
+
+  data_model_view_t<i_t, f_t> create_view() const
+  {
+    data_model_view_t<i_t, f_t> v;
+    v.set_maximize(maximize);
+    v.set_objective_scaling_factor(objective_scaling_factor);
+    v.set_objective_offset(objective_offset);
+
+    if (!A_values.empty()) {
+      v.set_csr_constraint_matrix(A_values.data(),
+                                  A_values.size(),
+                                  A_indices.data(),
+                                  A_indices.size(),
+                                  A_offsets.data(),
+                                  A_offsets.size());
+    }
+    if (!constraint_bounds.empty()) {
+      v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+    }
+    if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) {
+      v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size());
+      v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size());
+    }
+    if (!objective_coefficients.empty()) {
+      v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+    if (!variable_lower_bounds.empty()) {
+      v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+    if (!variable_upper_bounds.empty()) {
+      v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+    if (!variable_types.empty()) {
+      v.set_variable_types(variable_types.data(), variable_types.size());
+    }
+    if (!quadratic_objective_values.empty()) {
+      v.set_quadratic_objective_matrix(quadratic_objective_values.data(),
+                                       quadratic_objective_values.size(),
+                                       quadratic_objective_indices.data(),
+                                       quadratic_objective_indices.size(),
+                                       quadratic_objective_offsets.data(),
+                                       quadratic_objective_offsets.size());
+    }
+    v.set_is_device_memory(false);
+    return v;
+  }
+};
+
+// Helper to copy GPU view data to CPU
+template <typename i_t, typename f_t>
+cpu_problem_data_t<i_t, f_t> copy_view_to_cpu(raft::handle_t const* handle_ptr,
+                                              const data_model_view_t<i_t, f_t>& gpu_view)
+{
+  cpu_problem_data_t<i_t, f_t> cpu_data;
+  auto stream = handle_ptr->get_stream();
+
+  cpu_data.maximize                 = gpu_view.get_sense();
+  cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor();
+  cpu_data.objective_offset         = gpu_view.get_objective_offset();
+
+  auto copy_to_host = [stream](auto& dst_vec, auto src_span) {
+    if (src_span.size() > 0) {
+      dst_vec.resize(src_span.size());
+      raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream);
+    }
+  };
+
+  copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values());
+  copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices());
+  copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets());
+  copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds());
+  copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds());
+  copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds());
+  copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients());
+  copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds());
+  copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds());
+  copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values());
+  copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices());
+  copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets());
+
+  // Variable types need special handling (char array)
+  auto var_types_span = gpu_view.get_variable_types();
+  if (var_types_span.size() > 0) {
+    cpu_data.variable_types.resize(var_types_span.size());
+    cudaMemcpyAsync(cpu_data.variable_types.data(),
+                    var_types_span.data(),
+                    var_types_span.size() * sizeof(char),
+                    cudaMemcpyDeviceToHost,
+                    stream);
+  }
+
+  // Synchronize to ensure all copies are complete
+  cudaStreamSynchronize(stream);
+
+  return cpu_data;
+}
+
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp(raft::handle_t const* handle_ptr,
+                                                   const data_model_view_t<i_t, f_t>& view,
+                                                   pdlp_solver_settings_t<i_t, f_t> const& settings,
+                                                   bool problem_checking,
+                                                   bool use_pdlp_solver_mode)
+{
+  // Initialize logger for this overload (needed for early returns)
+  init_logger_t log(settings.log_file, settings.log_to_console);
+
+  // Check for remote solve configuration first
+  auto remote_config = get_remote_solve_config();
+
+  if (view.is_device_memory()) {
+    if (remote_config.has_value()) {
+      // GPU data + remote solve requested: need valid handle to copy GPU→CPU
+      if (handle_ptr == nullptr) {
+        CUOPT_LOG_ERROR(
+          "[solve_lp] Remote solve requested with GPU data but no CUDA handle. "
+          "This is an internal error - GPU data should not exist without CUDA initialization.");
+        return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::NumericalError);
+      }
+      CUOPT_LOG_WARN(
+        "[solve_lp] Remote solve requested but data is on GPU. "
+        "Copying to CPU for serialization (performance impact).");
+      auto cpu_data = copy_view_to_cpu(handle_ptr, view);
+      auto cpu_view = cpu_data.create_view();
+
+      CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                     remote_config->host.c_str(),
+                     remote_config->port);
+      // Call the remote solve function with CPU-side view
+      return solve_lp_remote(*remote_config, cpu_view, settings);
+    }
+
+    // Local solve: data already on GPU - convert view to optimization_problem_t and solve
+    auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
+    return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
+  }
+
+  // Data is on CPU
+  if (remote_config.has_value()) {
+    CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                   remote_config->host.c_str(),
+                   remote_config->port);
+    // Call the remote solve function
+    return solve_lp_remote(*remote_config, view, settings);
+  }
+
+  // Local solve with CPU data: copy to GPU and solve
+  auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
   return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
 }
 
-#define INSTANTIATE(F_TYPE)                                                            \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
-    optimization_problem_t<int, F_TYPE>& op_problem,                                   \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    bool problem_checking,                                                             \
-    bool use_pdlp_solver_mode,                                                         \
-    bool is_batch_mode);                                                               \
-                                                                                       \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
-    raft::handle_t const* handle_ptr,                                                  \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,            \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    bool problem_checking,                                                             \
-    bool use_pdlp_solver_mode);                                                        \
-                                                                                       \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(          \
-    detail::problem_t<int, F_TYPE>& problem,                                           \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    const timer_t& timer,                                                              \
-    bool is_batch_mode);                                                               \
-                                                                                       \
-  template optimization_problem_t<int, F_TYPE> mps_data_model_to_optimization_problem( \
-    raft::handle_t const* handle_ptr,                                                  \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
+#define INSTANTIATE(F_TYPE)                                                             \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    optimization_problem_t<int, F_TYPE>& op_problem,                                    \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode,                                                          \
+    bool is_batch_mode);                                                                \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    raft::handle_t const* handle_ptr,                                                   \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,             \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode);                                                         \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(           \
+    detail::problem_t<int, F_TYPE>& problem,                                            \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    const timer_t& timer,                                                               \
+    bool is_batch_mode);                                                                \
+                                                                                        \
+  template optimization_problem_t<int, F_TYPE> mps_data_model_to_optimization_problem(  \
+    raft::handle_t const* handle_ptr,                                                   \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);                \
+                                                                                        \
+  template optimization_problem_t<int, F_TYPE> data_model_view_to_optimization_problem( \
+    raft::handle_t const* handle_ptr, const data_model_view_t<int, F_TYPE>& view);      \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    raft::handle_t const* handle_ptr,                                                   \
+    const data_model_view_t<int, F_TYPE>& view,                                         \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode);                                                         \
+                                                                                        \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE> const& settings);
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu
index 1409e7cbf..45ed3dcd3 100644
--- a/cpp/src/linear_programming/solver_solution.cu
+++ b/cpp/src/linear_programming/solver_solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -21,9 +21,10 @@ namespace cuopt::linear_programming {
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   pdlp_termination_status_t termination_status, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
@@ -32,14 +33,41 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   cuopt::logic_error error_status_, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     termination_status_(pdlp_termination_status_t::NoTermination),
     error_status_(error_status_)
 {
 }
 
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  pdlp_termination_status_t termination_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>()),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>()),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    termination_status_(termination_status),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  cuopt::logic_error error_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>()),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>()),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    termination_status_(pdlp_termination_status_t::NoTermination),
+    error_status_(error_status)
+{
+}
+
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   rmm::device_uvector<f_t>& final_primal_solution,
@@ -51,15 +79,16 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   const std::vector<std::string>& row_names,
   additional_termination_information_t& termination_stats,
   pdlp_termination_status_t termination_status)
-  : primal_solution_(std::move(final_primal_solution)),
-    dual_solution_(std::move(final_dual_solution)),
-    reduced_cost_(std::move(final_reduced_cost)),
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_primal_solution))),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_dual_solution))),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_reduced_cost))),
+    is_device_memory_(true),
     pdlp_warm_start_data_(std::move(warm_start_data)),
+    termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
     objective_name_(objective_name),
     var_names_(std::move(var_names)),
     row_names_(std::move(row_names)),
-    termination_stats_(std::move(termination_stats)),
-    termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -74,14 +103,15 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   const std::vector<std::string>& row_names,
   additional_termination_information_t& termination_stats,
   pdlp_termination_status_t termination_status)
-  : primal_solution_(std::move(final_primal_solution)),
-    dual_solution_(std::move(final_dual_solution)),
-    reduced_cost_(std::move(final_reduced_cost)),
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_primal_solution))),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_dual_solution))),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_reduced_cost))),
+    is_device_memory_(true),
+    termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
     objective_name_(objective_name),
     var_names_(std::move(var_names)),
     row_names_(std::move(row_names)),
-    termination_stats_(std::move(termination_stats)),
-    termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -98,14 +128,42 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   pdlp_termination_status_t termination_status,
   const raft::handle_t* handler_ptr,
   [[maybe_unused]] bool deep_copy)
-  : primal_solution_(final_primal_solution, handler_ptr->get_stream()),
-    dual_solution_(final_dual_solution, handler_ptr->get_stream()),
-    reduced_cost_(final_reduced_cost, handler_ptr->get_stream()),
+  : primal_solution_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_primal_solution, handler_ptr->get_stream())),
+    dual_solution_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_dual_solution, handler_ptr->get_stream())),
+    reduced_cost_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_reduced_cost, handler_ptr->get_stream())),
+    is_device_memory_(true),
+    termination_status_(termination_status),
+    termination_stats_(termination_stats),
     objective_name_(objective_name),
     var_names_(var_names),
     row_names_(row_names),
-    termination_stats_(termination_stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve with solution data
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  std::vector<f_t> primal_solution,
+  std::vector<f_t> dual_solution,
+  std::vector<f_t> reduced_cost,
+  const std::string objective_name,
+  const std::vector<std::string>& var_names,
+  const std::vector<std::string>& row_names,
+  additional_termination_information_t& termination_stats,
+  pdlp_termination_status_t termination_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>(std::move(primal_solution))),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>(std::move(dual_solution))),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>(std::move(reduced_cost))),
+    is_device_memory_(false),
     termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
+    objective_name_(objective_name),
+    var_names_(var_names),
+    row_names_(row_names),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -114,31 +172,56 @@ template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::copy_from(
   const raft::handle_t* handle_ptr, const optimization_problem_solution_t<i_t, f_t>& other)
 {
-  // Resize to make sure they are of same size
-  primal_solution_.resize(other.primal_solution_.size(), handle_ptr->get_stream());
-  dual_solution_.resize(other.dual_solution_.size(), handle_ptr->get_stream());
-  reduced_cost_.resize(other.reduced_cost_.size(), handle_ptr->get_stream());
-
-  // Copy the data
-  raft::copy(primal_solution_.data(),
-             other.primal_solution_.data(),
-             primal_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(dual_solution_.data(),
-             other.dual_solution_.data(),
-             dual_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(reduced_cost_.data(),
-             other.reduced_cost_.data(),
-             reduced_cost_.size(),
-             handle_ptr->get_stream());
+  is_device_memory_ = other.is_device_memory_;
+
+  if (other.is_device_memory_) {
+    // Copy GPU data
+    if (!primal_solution_) {
+      primal_solution_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+    if (!dual_solution_) {
+      dual_solution_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+    if (!reduced_cost_) {
+      reduced_cost_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+
+    // Resize to make sure they are of same size
+    primal_solution_->resize(other.primal_solution_->size(), handle_ptr->get_stream());
+    dual_solution_->resize(other.dual_solution_->size(), handle_ptr->get_stream());
+    reduced_cost_->resize(other.reduced_cost_->size(), handle_ptr->get_stream());
+
+    // Copy the data
+    raft::copy(primal_solution_->data(),
+               other.primal_solution_->data(),
+               primal_solution_->size(),
+               handle_ptr->get_stream());
+    raft::copy(dual_solution_->data(),
+               other.dual_solution_->data(),
+               dual_solution_->size(),
+               handle_ptr->get_stream());
+    raft::copy(reduced_cost_->data(),
+               other.reduced_cost_->data(),
+               reduced_cost_->size(),
+               handle_ptr->get_stream());
+    handle_ptr->sync_stream();
+  } else {
+    // Copy CPU data
+    if (!primal_solution_host_) { primal_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+    if (!dual_solution_host_) { dual_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+    if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique<std::vector<f_t>>(); }
+
+    *primal_solution_host_ = *other.primal_solution_host_;
+    *dual_solution_host_   = *other.dual_solution_host_;
+    *reduced_cost_host_    = *other.reduced_cost_host_;
+  }
+
   termination_stats_  = other.termination_stats_;
   termination_status_ = other.termination_status_;
   objective_name_     = other.objective_name_;
   var_names_          = other.var_names_;
   row_names_          = other.row_names_;
   // We do not copy the warm start info. As it is not needed for this purpose.
-  handle_ptr->sync_stream();
 }
 
 template <typename i_t, typename f_t>
@@ -203,18 +286,31 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_file(std::string_view f
            << std::endl;
     return;
   }
+
   std::vector<f_t> primal_solution;
   std::vector<f_t> dual_solution;
   std::vector<f_t> reduced_cost;
-  primal_solution.resize(primal_solution_.size());
-  dual_solution.resize(dual_solution_.size());
-  reduced_cost.resize(reduced_cost_.size());
-  raft::copy(
-    primal_solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  raft::copy(
-    dual_solution.data(), dual_solution_.data(), dual_solution_.size(), stream_view.value());
-  raft::copy(reduced_cost.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    primal_solution.resize(primal_solution_->size());
+    dual_solution.resize(dual_solution_->size());
+    reduced_cost.resize(reduced_cost_->size());
+    raft::copy(primal_solution.data(),
+               primal_solution_->data(),
+               primal_solution_->size(),
+               stream_view.value());
+    raft::copy(
+      dual_solution.data(), dual_solution_->data(), dual_solution_->size(), stream_view.value());
+    raft::copy(
+      reduced_cost.data(), reduced_cost_->data(), reduced_cost_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    primal_solution = *primal_solution_host_;
+    dual_solution   = *dual_solution_host_;
+    reduced_cost    = *reduced_cost_host_;
+  }
 
   myfile << "{ " << std::endl;
   myfile << "\t\"Termination reason\" : \"" << get_termination_status_string() << "\","
@@ -305,35 +401,78 @@ f_t optimization_problem_solution_t<i_t, f_t>::get_dual_objective_value() const
   return termination_stats_.dual_objective;
 }
 
+template <typename i_t, typename f_t>
+bool optimization_problem_solution_t<i_t, f_t>::is_device_memory() const
+{
+  return is_device_memory_;
+}
+
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
 {
-  return primal_solution_;
+  return *primal_solution_;
 }
 
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
   const
 {
-  return primal_solution_;
+  return *primal_solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution()
 {
-  return dual_solution_;
+  return *dual_solution_;
 }
 
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution() const
 {
-  return dual_solution_;
+  return *dual_solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost()
 {
-  return reduced_cost_;
+  return *reduced_cost_;
+}
+
+// Host (CPU) getters
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution_host()
+{
+  return *primal_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution_host() const
+{
+  return *primal_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution_host()
+{
+  return *dual_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution_host() const
+{
+  return *dual_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost_host()
+{
+  return *reduced_cost_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost_host() const
+{
+  return *reduced_cost_host_;
 }
 
 template <typename i_t, typename f_t>
@@ -362,6 +501,136 @@ optimization_problem_solution_t<i_t, f_t>::get_pdlp_warm_start_data()
   return pdlp_warm_start_data_;
 }
 
+//============================================================================
+// Setters for host solution data
+//============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_primal_solution_host(std::vector<f_t> solution)
+{
+  primal_solution_host_ = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_     = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_dual_solution_host(std::vector<f_t> solution)
+{
+  dual_solution_host_ = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_   = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_reduced_cost_host(std::vector<f_t> reduced_cost)
+{
+  reduced_cost_host_ = std::make_unique<std::vector<f_t>>(std::move(reduced_cost));
+  is_device_memory_  = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_termination_stats(
+  const additional_termination_information_t& stats)
+{
+  termination_stats_ = stats;
+}
+
+//============================================================================
+// Getters for termination statistics
+//============================================================================
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_l2_primal_residual() const
+{
+  return termination_stats_.l2_primal_residual;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_l2_dual_residual() const
+{
+  return termination_stats_.l2_dual_residual;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_primal_objective() const
+{
+  return termination_stats_.primal_objective;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_dual_objective() const
+{
+  return termination_stats_.dual_objective;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_gap() const
+{
+  return termination_stats_.gap;
+}
+
+template <typename i_t, typename f_t>
+i_t optimization_problem_solution_t<i_t, f_t>::get_nb_iterations() const
+{
+  return termination_stats_.number_of_steps_taken;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_solution_t<i_t, f_t>::get_solved_by_pdlp() const
+{
+  return solved_by_pdlp_;
+}
+
+//============================================================================
+// Setters for termination statistics
+//============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_l2_primal_residual(f_t value)
+{
+  termination_stats_.l2_primal_residual = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_l2_dual_residual(f_t value)
+{
+  termination_stats_.l2_dual_residual = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_primal_objective(f_t value)
+{
+  termination_stats_.primal_objective = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_dual_objective(f_t value)
+{
+  termination_stats_.dual_objective = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_gap(f_t value)
+{
+  termination_stats_.gap = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_nb_iterations(i_t value)
+{
+  termination_stats_.number_of_steps_taken = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_solved_by_pdlp(bool value)
+{
+  solved_by_pdlp_ = value;
+}
+
+template <typename i_t, typename f_t>
+std::string optimization_problem_solution_t<i_t, f_t>::get_error_string() const
+{
+  return error_status_.what();
+}
+
 template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
   std::string_view filename, rmm::cuda_stream_view stream_view) const
@@ -374,14 +643,73 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
 
   auto objective_value = get_objective_value();
   std::vector<f_t> solution;
-  solution.resize(primal_solution_.size());
-  raft::copy(
-    solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    solution.resize(primal_solution_->size());
+    raft::copy(
+      solution.data(), primal_solution_->data(), primal_solution_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    solution = *primal_solution_host_;
+  }
+
   solution_writer_t::write_solution_to_sol_file(
     std::string(filename), status, objective_value, var_names_, solution);
 }
 
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (!is_device_memory_) {
+    // Already on CPU, nothing to do
+    return;
+  }
+
+  // Initialize host storage if needed
+  if (!primal_solution_host_) { primal_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+  if (!dual_solution_host_) { dual_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+  if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique<std::vector<f_t>>(); }
+
+  // Copy primal solution
+  if (primal_solution_ && primal_solution_->size() > 0) {
+    primal_solution_host_->resize(primal_solution_->size());
+    raft::copy(primal_solution_host_->data(),
+               primal_solution_->data(),
+               primal_solution_->size(),
+               stream_view.value());
+  }
+
+  // Copy dual solution
+  if (dual_solution_ && dual_solution_->size() > 0) {
+    dual_solution_host_->resize(dual_solution_->size());
+    raft::copy(dual_solution_host_->data(),
+               dual_solution_->data(),
+               dual_solution_->size(),
+               stream_view.value());
+  }
+
+  // Copy reduced cost
+  if (reduced_cost_ && reduced_cost_->size() > 0) {
+    reduced_cost_host_->resize(reduced_cost_->size());
+    raft::copy(reduced_cost_host_->data(),
+               reduced_cost_->data(),
+               reduced_cost_->size(),
+               stream_view.value());
+  }
+
+  // Synchronize to ensure copies are complete
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  // Clear GPU storage to free memory
+  primal_solution_.reset();
+  dual_solution_.reset();
+  reduced_cost_.reset();
+
+  is_device_memory_ = false;
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class optimization_problem_solution_t<int, float>;
 #endif
diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto
new file mode 100644
index 000000000..354d9d1b5
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto
@@ -0,0 +1,370 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+syntax = "proto3";
+
+package cuopt.remote;
+
+// Protocol version and metadata
+message RequestHeader {
+  uint32 version = 1;           // Protocol version (currently 1)
+  ProblemType problem_type = 2; // LP or MIP
+  IndexType index_type = 3;     // INT32 or INT64
+  FloatType float_type = 4;     // FLOAT32 or DOUBLE
+}
+
+enum ProblemType {
+  LP = 0;
+  MIP = 1;
+}
+
+enum IndexType {
+  INT32 = 0;
+  INT64 = 1;
+}
+
+enum FloatType {
+  FLOAT32 = 0;
+  DOUBLE = 1;
+}
+
+// Optimization problem representation (field names match data_model_view_t)
+message OptimizationProblem {
+  // Problem metadata
+  string problem_name = 1;
+  string objective_name = 2;
+  bool maximize = 3;
+  double objective_scaling_factor = 4;
+  double objective_offset = 5;
+  // Field 6 reserved (was problem_category, now inferred from variable_types)
+
+  // Variable and row names (optional)
+  repeated string variable_names = 7;
+  repeated string row_names = 8;
+
+  // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_)
+  repeated double A = 10;
+  repeated int32 A_indices = 11;
+  repeated int32 A_offsets = 12;
+
+  // Problem vectors (names match data_model_view_t: c_, b_)
+  repeated double c = 20;              // objective coefficients
+  repeated double b = 21;              // constraint bounds (RHS)
+  repeated double variable_lower_bounds = 22;
+  repeated double variable_upper_bounds = 23;
+
+  // Constraint bounds (alternative to b + row_types)
+  repeated double constraint_lower_bounds = 24;
+  repeated double constraint_upper_bounds = 25;
+  bytes row_types = 26;  // char array: 'E' (=), 'L' (<=), 'G' (>=), 'N' (objective)
+
+  // Variable types (matches data_model_view_t: variable_types_)
+  bytes variable_types = 30;  // char array: 'C' (continuous), 'I' (integer), 'B' (binary)
+
+  // Initial solutions (matches data_model_view_t)
+  repeated double initial_primal_solution = 40;
+  repeated double initial_dual_solution = 41;
+
+  // Quadratic objective matrix Q in CSR format for QPS (matches data_model_view_t: Q_objective_)
+  repeated double Q_values = 50;
+  repeated int32 Q_indices = 51;
+  repeated int32 Q_offsets = 52;
+}
+
+// PDLP solver mode enum (matches cuOpt pdlp_solver_mode_t)
+// Matches cuOpt pdlp_solver_mode_t enum values
+enum PDLPSolverMode {
+  Stable1 = 0;
+  Stable2 = 1;
+  Methodical1 = 2;
+  Fast1 = 3;
+  Stable3 = 4;
+}
+
+// Matches cuOpt method_t enum values
+enum LPMethod {
+  Concurrent = 0;
+  PDLP = 1;
+  DualSimplex = 2;
+  Barrier = 3;
+}
+
+// PDLP solver settings (field names match cuOpt Python/C++ API)
+message PDLPSolverSettings {
+  // Termination tolerances
+  double absolute_gap_tolerance = 1;
+  double relative_gap_tolerance = 2;
+  double primal_infeasible_tolerance = 3;
+  double dual_infeasible_tolerance = 4;
+  double absolute_dual_tolerance = 5;
+  double relative_dual_tolerance = 6;
+  double absolute_primal_tolerance = 7;
+  double relative_primal_tolerance = 8;
+
+  // Limits
+  double time_limit = 10;
+  int64 iteration_limit = 11;
+
+  // Solver configuration
+  bool log_to_console = 20;
+  bool detect_infeasibility = 21;
+  bool strict_infeasibility = 22;
+  PDLPSolverMode pdlp_solver_mode = 23;
+  LPMethod method = 24;
+  bool presolve = 25;
+  bool dual_postsolve = 26;
+  bool crossover = 27;
+  int32 num_gpus = 28;
+
+  // Advanced options
+  bool per_constraint_residual = 30;
+  bool cudss_deterministic = 31;
+  int32 folding = 32;
+  int32 augmented = 33;
+  int32 dualize = 34;
+  int32 ordering = 35;
+  int32 barrier_dual_initial_point = 36;
+  bool eliminate_dense_columns = 37;
+  bool save_best_primal_so_far = 38;
+  bool first_primal_feasible = 39;
+
+  // Warm start data (if provided)
+  PDLPWarmStartData warm_start_data = 50;
+}
+
+message PDLPWarmStartData {
+  repeated double current_primal_solution = 1;
+  repeated double current_dual_solution = 2;
+  repeated double initial_primal_average = 3;
+  repeated double initial_dual_average = 4;
+  repeated double current_ATY = 5;
+  repeated double sum_primal_solutions = 6;
+  repeated double sum_dual_solutions = 7;
+  repeated double last_restart_duality_gap_primal_solution = 8;
+  repeated double last_restart_duality_gap_dual_solution = 9;
+
+  double initial_primal_weight = 10;
+  double initial_step_size = 11;
+  int32 total_pdlp_iterations = 12;
+  int32 total_pdhg_iterations = 13;
+  double last_candidate_kkt_score = 14;
+  double last_restart_kkt_score = 15;
+  double sum_solution_weight = 16;
+  int32 iterations_since_last_restart = 17;
+}
+
+// MIP solver settings (field names match cuOpt Python/C++ API)
+message MIPSolverSettings {
+  // Limits
+  double time_limit = 1;
+
+  // Tolerances
+  double relative_mip_gap = 2;
+  double absolute_mip_gap = 3;
+  double integrality_tolerance = 4;
+  double absolute_tolerance = 5;
+  double relative_tolerance = 6;
+  double presolve_absolute_tolerance = 7;
+
+  // Solver configuration
+  bool log_to_console = 10;
+  bool heuristics_only = 11;
+  int32 num_cpu_threads = 12;
+  int32 num_gpus = 13;
+  bool presolve = 14;
+  bool mip_scaling = 15;
+}
+
+// LP solve request
+message SolveLPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  PDLPSolverSettings settings = 3;
+}
+
+// MIP solve request
+message SolveMIPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  MIPSolverSettings settings = 3;
+}
+
+// LP solution
+message LPSolution {
+  // Solution vectors
+  repeated double primal_solution = 1;
+  repeated double dual_solution = 2;
+  repeated double reduced_cost = 3;
+
+  // Warm start data for next solve
+  PDLPWarmStartData warm_start_data = 4;
+
+  // Termination information
+  PDLPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  // Solution statistics
+  double l2_primal_residual = 20;
+  double l2_dual_residual = 21;
+  double primal_objective = 22;
+  double dual_objective = 23;
+  double gap = 24;
+  int32 nb_iterations = 25;
+  double solve_time = 26;
+  bool solved_by_pdlp = 27;
+}
+
+enum PDLPTerminationStatus {
+  PDLP_NO_TERMINATION = 0;
+  PDLP_NUMERICAL_ERROR = 1;
+  PDLP_OPTIMAL = 2;
+  PDLP_PRIMAL_INFEASIBLE = 3;
+  PDLP_DUAL_INFEASIBLE = 4;
+  PDLP_ITERATION_LIMIT = 5;
+  PDLP_TIME_LIMIT = 6;
+  PDLP_CONCURRENT_LIMIT = 7;
+  PDLP_PRIMAL_FEASIBLE = 8;
+}
+
+// MIP solution
+message MIPSolution {
+  repeated double solution = 1;
+
+  MIPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  double objective = 20;
+  double mip_gap = 21;
+  double solution_bound = 22;
+  double total_solve_time = 23;
+  double presolve_time = 24;
+  double max_constraint_violation = 25;
+  double max_int_violation = 26;
+  double max_variable_bound_violation = 27;
+  int32 nodes = 28;
+  int32 simplex_iterations = 29;
+}
+
+enum MIPTerminationStatus {
+  MIP_NO_TERMINATION = 0;
+  MIP_OPTIMAL = 1;
+  MIP_FEASIBLE_FOUND = 2;
+  MIP_INFEASIBLE = 3;
+  MIP_UNBOUNDED = 4;
+  MIP_TIME_LIMIT = 5;
+}
+
+// Request types for async operations
+enum RequestType {
+  SUBMIT_JOB = 0;        // Submit a new solve job
+  CHECK_STATUS = 1;      // Check job status
+  GET_RESULT = 2;        // Retrieve completed result
+  DELETE_RESULT = 3;     // Delete result from server
+  GET_LOGS = 4;          // Retrieve buffered log entries
+  CANCEL_JOB = 5;        // Cancel a queued or running job
+  WAIT_FOR_RESULT = 6;   // Block until job completes, returns result
+}
+
+// Job status for async operations
+enum JobStatus {
+  QUEUED = 0;           // Job submitted, waiting in queue
+  PROCESSING = 1;       // Job currently being solved
+  COMPLETED = 2;        // Job completed successfully
+  FAILED = 3;           // Job failed with error
+  NOT_FOUND = 4;        // Job ID not found
+  CANCELLED = 5;        // Job was cancelled by user
+}
+
+// Generic request wrapper for async operations
+message AsyncRequest {
+  RequestType request_type = 1;
+  bytes job_id = 2;  // For status/get/delete/get_logs operations (bytes to avoid UTF-8 validation warnings)
+  bool blocking = 3;  // If true, server waits and returns solution (sync mode)
+  int64 frombyte = 4; // For GET_LOGS: byte offset to start reading from
+
+  // For SUBMIT_JOB requests
+  oneof job_data {
+    SolveLPRequest lp_request = 10;
+    SolveMIPRequest mip_request = 11;
+  }
+}
+
+// Response for job submission
+message SubmitResponse {
+  ResponseStatus status = 1;
+  bytes job_id = 2;           // Unique job identifier (bytes to avoid UTF-8 validation warnings)
+  string message = 3;         // Success/error message
+}
+
+// Response for status check
+message StatusResponse {
+  JobStatus job_status = 1;
+  string message = 2;
+  double progress = 3;        // 0.0-1.0 (future enhancement)
+}
+
+// Response for get result
+message ResultResponse {
+  ResponseStatus status = 1;
+  string error_message = 2;
+
+  oneof solution {
+    LPSolution lp_solution = 10;
+    MIPSolution mip_solution = 11;
+  }
+}
+
+// Response for delete
+message DeleteResponse {
+  ResponseStatus status = 1;
+  string message = 2;
+}
+
+// Response for log retrieval (file-based, like Python server)
+message LogsResponse {
+  ResponseStatus status = 1;
+  bytes job_id = 2;               // (bytes to avoid UTF-8 validation warnings)
+  repeated string log_lines = 3;  // Log lines read from file
+  int64 nbytes = 4;               // Ending byte position (use as frombyte in next request)
+  bool job_exists = 5;            // False if job_id not found
+}
+
+// Response for cancel job
+message CancelResponse {
+  ResponseStatus status = 1;
+  string message = 2;
+  JobStatus job_status = 3;       // Status of job after cancel attempt
+}
+
+// Generic response wrapper
+message AsyncResponse {
+  RequestType request_type = 1;
+
+  oneof response_data {
+    SubmitResponse submit_response = 10;
+    StatusResponse status_response = 11;
+    ResultResponse result_response = 12;
+    DeleteResponse delete_response = 13;
+    LogsResponse logs_response = 14;
+    CancelResponse cancel_response = 15;
+  }
+}
+
+// Legacy synchronous response (for backward compatibility)
+message SolveResponse {
+  ResponseStatus status = 1;
+  string error_message = 2;
+
+  oneof solution {
+    LPSolution lp_solution = 10;
+    MIPSolution mip_solution = 11;
+  }
+}
+
+enum ResponseStatus {
+  SUCCESS = 0;
+  ERROR_INVALID_REQUEST = 1;
+  ERROR_SOLVE_FAILED = 2;
+  ERROR_INTERNAL = 3;
+  ERROR_NOT_FOUND = 4;
+}
diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu
index 0e1dbc6af..f0dd24706 100644
--- a/cpp/src/linear_programming/utilities/cython_solve.cu
+++ b/cpp/src/linear_programming/utilities/cython_solve.cu
@@ -10,6 +10,7 @@
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mip/logger.hpp>
 #include <mps_parser/data_model_view.hpp>
 #include <mps_parser/mps_data_model.hpp>
@@ -142,47 +143,58 @@ linear_programming_ret_t call_solve_lp(
   const bool use_pdlp_solver_mode = true;
   auto solution                   = cuopt::linear_programming::solve_lp(
     op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
-  linear_programming_ret_t lp_ret{
-    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_dual_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_primal_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_dual_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_ATY_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()),
-    solution.get_pdlp_warm_start_data().initial_primal_weight_,
-    solution.get_pdlp_warm_start_data().initial_step_size_,
-    solution.get_pdlp_warm_start_data().total_pdlp_iterations_,
-    solution.get_pdlp_warm_start_data().total_pdhg_iterations_,
-    solution.get_pdlp_warm_start_data().last_candidate_kkt_score_,
-    solution.get_pdlp_warm_start_data().last_restart_kkt_score_,
-    solution.get_pdlp_warm_start_data().sum_solution_weight_,
-    solution.get_pdlp_warm_start_data().iterations_since_last_restart_,
-    solution.get_termination_status(),
-    solution.get_error_status().get_error_type(),
-    solution.get_error_status().what(),
-    solution.get_additional_termination_information().l2_primal_residual,
-    solution.get_additional_termination_information().l2_dual_residual,
-    solution.get_additional_termination_information().primal_objective,
-    solution.get_additional_termination_information().dual_objective,
-    solution.get_additional_termination_information().gap,
-    solution.get_additional_termination_information().number_of_steps_taken,
-    solution.get_additional_termination_information().solve_time,
-    solution.get_additional_termination_information().solved_by_pdlp};
+
+  linear_programming_ret_t lp_ret;
+
+  // GPU data (local solve always uses GPU)
+  lp_ret.primal_solution_ =
+    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release());
+  lp_ret.dual_solution_ =
+    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release());
+  lp_ret.reduced_cost_ =
+    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release());
+  lp_ret.is_device_memory_ = true;
+
+  // Warm start data
+  lp_ret.current_primal_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_primal_solution_.release());
+  lp_ret.current_dual_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_dual_solution_.release());
+  lp_ret.initial_primal_average_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().initial_primal_average_.release());
+  lp_ret.initial_dual_average_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().initial_dual_average_.release());
+  lp_ret.current_ATY_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_ATY_.release());
+  lp_ret.sum_primal_solutions_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().sum_primal_solutions_.release());
+  lp_ret.sum_dual_solutions_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().sum_dual_solutions_.release());
+  lp_ret.last_restart_duality_gap_primal_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release());
+  lp_ret.last_restart_duality_gap_dual_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release());
+  lp_ret.initial_primal_weight_    = solution.get_pdlp_warm_start_data().initial_primal_weight_;
+  lp_ret.initial_step_size_        = solution.get_pdlp_warm_start_data().initial_step_size_;
+  lp_ret.total_pdlp_iterations_    = solution.get_pdlp_warm_start_data().total_pdlp_iterations_;
+  lp_ret.total_pdhg_iterations_    = solution.get_pdlp_warm_start_data().total_pdhg_iterations_;
+  lp_ret.last_candidate_kkt_score_ = solution.get_pdlp_warm_start_data().last_candidate_kkt_score_;
+  lp_ret.last_restart_kkt_score_   = solution.get_pdlp_warm_start_data().last_restart_kkt_score_;
+  lp_ret.sum_solution_weight_      = solution.get_pdlp_warm_start_data().sum_solution_weight_;
+  lp_ret.iterations_since_last_restart_ =
+    solution.get_pdlp_warm_start_data().iterations_since_last_restart_;
+
+  lp_ret.termination_status_ = solution.get_termination_status();
+  lp_ret.error_status_       = solution.get_error_status().get_error_type();
+  lp_ret.error_message_      = solution.get_error_status().what();
+  lp_ret.l2_primal_residual_ = solution.get_additional_termination_information().l2_primal_residual;
+  lp_ret.l2_dual_residual_   = solution.get_additional_termination_information().l2_dual_residual;
+  lp_ret.primal_objective_   = solution.get_additional_termination_information().primal_objective;
+  lp_ret.dual_objective_     = solution.get_additional_termination_information().dual_objective;
+  lp_ret.gap_                = solution.get_additional_termination_information().gap;
+  lp_ret.nb_iterations_  = solution.get_additional_termination_information().number_of_steps_taken;
+  lp_ret.solve_time_     = solution.get_additional_termination_information().solve_time;
+  lp_ret.solved_by_pdlp_ = solution.get_additional_termination_information().solved_by_pdlp;
 
   return lp_ret;
 }
@@ -205,20 +217,24 @@ mip_ret_t call_solve_mip(
     error_type_t::ValidationError,
     "MIP solve cannot be called on an LP problem!");
   auto solution = cuopt::linear_programming::solve_mip(op_problem, solver_settings);
-  mip_ret_t mip_ret{std::make_unique<rmm::device_buffer>(solution.get_solution().release()),
-                    solution.get_termination_status(),
-                    solution.get_error_status().get_error_type(),
-                    solution.get_error_status().what(),
-                    solution.get_objective_value(),
-                    solution.get_mip_gap(),
-                    solution.get_solution_bound(),
-                    solution.get_total_solve_time(),
-                    solution.get_presolve_time(),
-                    solution.get_max_constraint_violation(),
-                    solution.get_max_int_violation(),
-                    solution.get_max_variable_bound_violation(),
-                    solution.get_num_nodes(),
-                    solution.get_num_simplex_iterations()};
+
+  mip_ret_t mip_ret;
+  mip_ret.solution_ = std::make_unique<rmm::device_buffer>(solution.get_solution().release());
+  mip_ret.is_device_memory_             = true;
+  mip_ret.termination_status_           = solution.get_termination_status();
+  mip_ret.error_status_                 = solution.get_error_status().get_error_type();
+  mip_ret.error_message_                = solution.get_error_status().what();
+  mip_ret.objective_                    = solution.get_objective_value();
+  mip_ret.mip_gap_                      = solution.get_mip_gap();
+  mip_ret.solution_bound_               = solution.get_solution_bound();
+  mip_ret.total_solve_time_             = solution.get_total_solve_time();
+  mip_ret.presolve_time_                = solution.get_presolve_time();
+  mip_ret.max_constraint_violation_     = solution.get_max_constraint_violation();
+  mip_ret.max_int_violation_            = solution.get_max_int_violation();
+  mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation();
+  mip_ret.nodes_                        = solution.get_num_nodes();
+  mip_ret.simplex_iterations_           = solution.get_num_simplex_iterations();
+
   return mip_ret;
 }
 
@@ -228,15 +244,132 @@ std::unique_ptr<solver_ret_t> call_solve(
   unsigned int flags,
   bool is_batch_mode)
 {
+  // Check if remote solve is configured FIRST (before any CUDA operations)
+  if (linear_programming::is_remote_solve_enabled()) {
+    // Data coming from Python is in CPU memory - mark it as such
+    data_model->set_is_device_memory(false);
+
+    solver_ret_t response;
+
+    // Determine if LP or MIP based on variable types
+    bool is_mip    = false;
+    auto var_types = data_model->get_variable_types();
+    for (size_t i = 0; i < var_types.size(); ++i) {
+      if (var_types.data()[i] != 'C') {
+        is_mip = true;
+        break;
+      }
+    }
+
+    if (!is_mip) {
+      // LP: call solve_lp with nullptr handle - remote solve doesn't need GPU
+      auto solution =
+        linear_programming::solve_lp(nullptr, *data_model, solver_settings->get_pdlp_settings());
+
+      // Convert solution to linear_programming_ret_t
+      auto term_info = solution.get_additional_termination_information();
+      linear_programming_ret_t lp_ret;
+
+      if (solution.is_device_memory()) {
+        // GPU data (shouldn't happen for remote solve, but handle gracefully)
+        lp_ret.primal_solution_ =
+          std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release());
+        lp_ret.dual_solution_ =
+          std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release());
+        lp_ret.reduced_cost_ =
+          std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release());
+        lp_ret.is_device_memory_ = true;
+      } else {
+        // CPU data from remote solve - initialize empty device buffers for Python compatibility
+        lp_ret.primal_solution_      = std::make_unique<rmm::device_buffer>();
+        lp_ret.dual_solution_        = std::make_unique<rmm::device_buffer>();
+        lp_ret.reduced_cost_         = std::make_unique<rmm::device_buffer>();
+        lp_ret.primal_solution_host_ = std::move(solution.get_primal_solution_host());
+        lp_ret.dual_solution_host_   = std::move(solution.get_dual_solution_host());
+        lp_ret.reduced_cost_host_    = std::move(solution.get_reduced_cost_host());
+        lp_ret.is_device_memory_     = false;
+      }
+
+      // Warm start data - create empty buffers to avoid null pointer issues in Python wrapper
+      lp_ret.current_primal_solution_                  = std::make_unique<rmm::device_buffer>();
+      lp_ret.current_dual_solution_                    = std::make_unique<rmm::device_buffer>();
+      lp_ret.initial_primal_average_                   = std::make_unique<rmm::device_buffer>();
+      lp_ret.initial_dual_average_                     = std::make_unique<rmm::device_buffer>();
+      lp_ret.current_ATY_                              = std::make_unique<rmm::device_buffer>();
+      lp_ret.sum_primal_solutions_                     = std::make_unique<rmm::device_buffer>();
+      lp_ret.sum_dual_solutions_                       = std::make_unique<rmm::device_buffer>();
+      lp_ret.last_restart_duality_gap_primal_solution_ = std::make_unique<rmm::device_buffer>();
+      lp_ret.last_restart_duality_gap_dual_solution_   = std::make_unique<rmm::device_buffer>();
+      lp_ret.initial_primal_weight_                    = 0.0;
+      lp_ret.initial_step_size_                        = 0.0;
+      lp_ret.total_pdlp_iterations_                    = 0;
+      lp_ret.total_pdhg_iterations_                    = 0;
+      lp_ret.last_candidate_kkt_score_                 = 0.0;
+      lp_ret.last_restart_kkt_score_                   = 0.0;
+      lp_ret.sum_solution_weight_                      = 0.0;
+      lp_ret.iterations_since_last_restart_            = 0;
+
+      lp_ret.termination_status_ = solution.get_termination_status();
+      lp_ret.error_status_       = solution.get_error_status().get_error_type();
+      lp_ret.error_message_      = solution.get_error_status().what();
+      lp_ret.l2_primal_residual_ = term_info.l2_primal_residual;
+      lp_ret.l2_dual_residual_   = term_info.l2_dual_residual;
+      lp_ret.primal_objective_   = term_info.primal_objective;
+      lp_ret.dual_objective_     = term_info.dual_objective;
+      lp_ret.gap_                = term_info.gap;
+      lp_ret.nb_iterations_      = term_info.number_of_steps_taken;
+      lp_ret.solve_time_         = solution.get_solve_time();
+      lp_ret.solved_by_pdlp_     = false;
+
+      response.lp_ret       = std::move(lp_ret);
+      response.problem_type = linear_programming::problem_category_t::LP;
+    } else {
+      // MIP: call solve_mip with nullptr handle - remote solve doesn't need GPU
+      auto solution =
+        linear_programming::solve_mip(nullptr, *data_model, solver_settings->get_mip_settings());
+
+      mip_ret_t mip_ret;
+
+      if (solution.is_device_memory()) {
+        // GPU data (shouldn't happen for remote solve, but handle gracefully)
+        mip_ret.solution_ = std::make_unique<rmm::device_buffer>(solution.get_solution().release());
+        mip_ret.is_device_memory_ = true;
+      } else {
+        // CPU data from remote solve - initialize empty device buffer for Python compatibility
+        mip_ret.solution_         = std::make_unique<rmm::device_buffer>();
+        mip_ret.solution_host_    = std::move(solution.get_solution_host());
+        mip_ret.is_device_memory_ = false;
+      }
+
+      mip_ret.termination_status_           = solution.get_termination_status();
+      mip_ret.error_status_                 = solution.get_error_status().get_error_type();
+      mip_ret.error_message_                = solution.get_error_status().what();
+      mip_ret.objective_                    = solution.get_objective_value();
+      mip_ret.mip_gap_                      = solution.get_mip_gap();
+      mip_ret.solution_bound_               = solution.get_solution_bound();
+      mip_ret.total_solve_time_             = solution.get_total_solve_time();
+      mip_ret.presolve_time_                = solution.get_presolve_time();
+      mip_ret.max_constraint_violation_     = solution.get_max_constraint_violation();
+      mip_ret.max_int_violation_            = solution.get_max_int_violation();
+      mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation();
+      mip_ret.nodes_                        = solution.get_num_nodes();
+      mip_ret.simplex_iterations_           = solution.get_num_simplex_iterations();
+
+      response.mip_ret      = std::move(mip_ret);
+      response.problem_type = linear_programming::problem_category_t::MIP;
+    }
+
+    return std::make_unique<solver_ret_t>(std::move(response));
+  }
+
+  // Local solve: create CUDA resources only when needed
   raft::common::nvtx::range fun_scope("Call Solve");
 
   // FIX: Use default handle constructor like CLI does, instead of explicit stream creation
   // Original code created a non-blocking stream which causes synchronization issues with PDLP
-  // This is a workaround to fix the synchronization issues, please fix this in the future and
-  // remove this workaround. cudaStream_t stream; RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream,
-  // flags));  // flags=cudaStreamNonBlocking const raft::handle_t handle_{stream};
   const raft::handle_t handle_{};
 
+  // Local solve: proceed as before - create GPU problem and solve
   auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_);
   solver_ret_t response;
   if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) {
diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu
new file mode 100644
index 000000000..ad3c2704c
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu
@@ -0,0 +1,1520 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuopt/linear_programming/constants.h>
+#include <cuopt_remote.pb.h>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+
+#include <utilities/logger.hpp>
+
+#include <dlfcn.h>
+#include <cmath>
+#include <limits>
+
+namespace cuopt::linear_programming {
+
+namespace {
+
+// Convert cuOpt termination status to protobuf enum
+cuopt::remote::PDLPTerminationStatus to_proto_status(pdlp_termination_status_t status)
+{
+  switch (status) {
+    case pdlp_termination_status_t::NoTermination: return cuopt::remote::PDLP_NO_TERMINATION;
+    case pdlp_termination_status_t::NumericalError: return cuopt::remote::PDLP_NUMERICAL_ERROR;
+    case pdlp_termination_status_t::Optimal: return cuopt::remote::PDLP_OPTIMAL;
+    case pdlp_termination_status_t::PrimalInfeasible: return cuopt::remote::PDLP_PRIMAL_INFEASIBLE;
+    case pdlp_termination_status_t::DualInfeasible: return cuopt::remote::PDLP_DUAL_INFEASIBLE;
+    case pdlp_termination_status_t::IterationLimit: return cuopt::remote::PDLP_ITERATION_LIMIT;
+    case pdlp_termination_status_t::TimeLimit: return cuopt::remote::PDLP_TIME_LIMIT;
+    case pdlp_termination_status_t::ConcurrentLimit: return cuopt::remote::PDLP_CONCURRENT_LIMIT;
+    case pdlp_termination_status_t::PrimalFeasible: return cuopt::remote::PDLP_PRIMAL_FEASIBLE;
+    default: return cuopt::remote::PDLP_NO_TERMINATION;
+  }
+}
+
+// Convert protobuf enum to cuOpt termination status
+pdlp_termination_status_t from_proto_status(cuopt::remote::PDLPTerminationStatus status)
+{
+  switch (status) {
+    case cuopt::remote::PDLP_NO_TERMINATION: return pdlp_termination_status_t::NoTermination;
+    case cuopt::remote::PDLP_NUMERICAL_ERROR: return pdlp_termination_status_t::NumericalError;
+    case cuopt::remote::PDLP_OPTIMAL: return pdlp_termination_status_t::Optimal;
+    case cuopt::remote::PDLP_PRIMAL_INFEASIBLE: return pdlp_termination_status_t::PrimalInfeasible;
+    case cuopt::remote::PDLP_DUAL_INFEASIBLE: return pdlp_termination_status_t::DualInfeasible;
+    case cuopt::remote::PDLP_ITERATION_LIMIT: return pdlp_termination_status_t::IterationLimit;
+    case cuopt::remote::PDLP_TIME_LIMIT: return pdlp_termination_status_t::TimeLimit;
+    case cuopt::remote::PDLP_CONCURRENT_LIMIT: return pdlp_termination_status_t::ConcurrentLimit;
+    case cuopt::remote::PDLP_PRIMAL_FEASIBLE: return pdlp_termination_status_t::PrimalFeasible;
+    default: return pdlp_termination_status_t::NoTermination;
+  }
+}
+
+// Convert MIP termination status
+cuopt::remote::MIPTerminationStatus to_proto_mip_status(mip_termination_status_t status)
+{
+  switch (status) {
+    case mip_termination_status_t::NoTermination: return cuopt::remote::MIP_NO_TERMINATION;
+    case mip_termination_status_t::Optimal: return cuopt::remote::MIP_OPTIMAL;
+    case mip_termination_status_t::FeasibleFound: return cuopt::remote::MIP_FEASIBLE_FOUND;
+    case mip_termination_status_t::Infeasible: return cuopt::remote::MIP_INFEASIBLE;
+    case mip_termination_status_t::Unbounded: return cuopt::remote::MIP_UNBOUNDED;
+    case mip_termination_status_t::TimeLimit: return cuopt::remote::MIP_TIME_LIMIT;
+    default: return cuopt::remote::MIP_NO_TERMINATION;
+  }
+}
+
+mip_termination_status_t from_proto_mip_status(cuopt::remote::MIPTerminationStatus status)
+{
+  switch (status) {
+    case cuopt::remote::MIP_NO_TERMINATION: return mip_termination_status_t::NoTermination;
+    case cuopt::remote::MIP_OPTIMAL: return mip_termination_status_t::Optimal;
+    case cuopt::remote::MIP_FEASIBLE_FOUND: return mip_termination_status_t::FeasibleFound;
+    case cuopt::remote::MIP_INFEASIBLE: return mip_termination_status_t::Infeasible;
+    case cuopt::remote::MIP_UNBOUNDED: return mip_termination_status_t::Unbounded;
+    case cuopt::remote::MIP_TIME_LIMIT: return mip_termination_status_t::TimeLimit;
+    default: return mip_termination_status_t::NoTermination;
+  }
+}
+
+}  // namespace
+
+/**
+ * @brief Default Protocol Buffers serializer implementation.
+ */
+template <typename i_t, typename f_t>
+class protobuf_serializer_t : public remote_serializer_t<i_t, f_t> {
+ public:
+  using job_status_t = typename remote_serializer_t<i_t, f_t>::job_status_t;
+
+  protobuf_serializer_t()           = default;
+  ~protobuf_serializer_t() override = default;
+
+  //============================================================================
+  // Problem Serialization
+  //============================================================================
+
+  std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    cuopt::remote::SolveLPRequest request;
+
+    // Set header
+    auto* header = request.mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::LP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Serialize problem data
+    serialize_problem_to_proto(view, request.mutable_problem());
+
+    // Serialize settings
+    serialize_lp_settings_to_proto(settings, request.mutable_settings());
+
+    // Serialize to bytes
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    cuopt::remote::SolveMIPRequest request;
+
+    // Set header
+    auto* header = request.mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::MIP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Serialize problem data
+    serialize_problem_to_proto(view, request.mutable_problem());
+
+    // Serialize all MIP settings (names match cuOpt API)
+    auto* pb_settings = request.mutable_settings();
+    pb_settings->set_time_limit(settings.time_limit);
+    pb_settings->set_relative_mip_gap(settings.tolerances.relative_mip_gap);
+    pb_settings->set_absolute_mip_gap(settings.tolerances.absolute_mip_gap);
+    pb_settings->set_integrality_tolerance(settings.tolerances.integrality_tolerance);
+    pb_settings->set_absolute_tolerance(settings.tolerances.absolute_tolerance);
+    pb_settings->set_relative_tolerance(settings.tolerances.relative_tolerance);
+    pb_settings->set_presolve_absolute_tolerance(settings.tolerances.presolve_absolute_tolerance);
+    pb_settings->set_log_to_console(settings.log_to_console);
+    pb_settings->set_heuristics_only(settings.heuristics_only);
+    pb_settings->set_num_cpu_threads(settings.num_cpu_threads);
+    pb_settings->set_num_gpus(settings.num_gpus);
+    pb_settings->set_presolve(settings.presolve);
+    pb_settings->set_mip_scaling(settings.mip_scaling);
+
+    // Serialize to bytes
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  //============================================================================
+  // Solution Deserialization
+  //============================================================================
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::LPSolution pb_solution;
+    if (!pb_solution.ParseFromArray(data.data(), data.size())) {
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse LP solution", cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_lp_solution(pb_solution);
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_solution(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::MIPSolution pb_solution;
+    if (!pb_solution.ParseFromArray(data.data(), data.size())) {
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse MIP solution", cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_mip_solution(pb_solution);
+  }
+
+  //============================================================================
+  // Server-side Operations
+  //============================================================================
+
+  bool is_mip_request(const std::vector<uint8_t>& data) override
+  {
+    // Try to parse as async request first
+    cuopt::remote::AsyncRequest async_request;
+    if (async_request.ParseFromArray(data.data(), data.size())) {
+      if (async_request.has_mip_request()) { return true; }
+      if (async_request.has_lp_request()) { return false; }
+    }
+
+    // Try to parse as direct request and check the header's problem_type
+    // MIP request - check if header indicates MIP
+    cuopt::remote::SolveMIPRequest mip_request;
+    if (mip_request.ParseFromArray(data.data(), data.size()) && mip_request.has_header()) {
+      if (mip_request.header().problem_type() == cuopt::remote::MIP) { return true; }
+    }
+
+    // LP request - check if header indicates LP
+    cuopt::remote::SolveLPRequest lp_request;
+    if (lp_request.ParseFromArray(data.data(), data.size()) && lp_request.has_header()) {
+      if (lp_request.header().problem_type() == cuopt::remote::LP) { return false; }
+    }
+
+    return false;  // Default to LP if can't determine
+  }
+
+  bool deserialize_lp_request(const std::vector<uint8_t>& data,
+                              cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                              pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Try async request first
+    cuopt::remote::AsyncRequest async_request;
+    if (async_request.ParseFromArray(data.data(), data.size()) && async_request.has_lp_request()) {
+      const auto& lp_request = async_request.lp_request();
+      proto_to_mps_data(lp_request.problem(), mps_data);
+      proto_to_lp_settings(lp_request.settings(), settings);
+      return true;
+    }
+
+    // Try direct LP request
+    cuopt::remote::SolveLPRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) {
+      CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse LP request");
+      return false;
+    }
+
+    proto_to_mps_data(request.problem(), mps_data);
+    proto_to_lp_settings(request.settings(), settings);
+    return true;
+  }
+
+  bool deserialize_mip_request(const std::vector<uint8_t>& data,
+                               cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                               mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Try async request first
+    cuopt::remote::AsyncRequest async_request;
+    if (async_request.ParseFromArray(data.data(), data.size()) && async_request.has_mip_request()) {
+      const auto& mip_request = async_request.mip_request();
+      proto_to_mps_data(mip_request.problem(), mps_data);
+      proto_to_mip_settings(mip_request.settings(), settings);
+      return true;
+    }
+
+    // Try direct MIP request
+    cuopt::remote::SolveMIPRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) {
+      CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse MIP request");
+      return false;
+    }
+
+    proto_to_mps_data(request.problem(), mps_data);
+    proto_to_mip_settings(request.settings(), settings);
+    return true;
+  }
+
+  std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) override
+  {
+    cuopt::remote::LPSolution pb_solution;
+    lp_solution_to_proto(solution, &pb_solution);
+
+    std::vector<uint8_t> result(pb_solution.ByteSizeLong());
+    pb_solution.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_mip_solution(const mip_solution_t<i_t, f_t>& solution) override
+  {
+    cuopt::remote::MIPSolution pb_solution;
+    mip_solution_to_proto(solution, &pb_solution);
+
+    std::vector<uint8_t> result(pb_solution.ByteSizeLong());
+    pb_solution.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  //============================================================================
+  // Async Operations
+  //============================================================================
+
+  std::vector<uint8_t> serialize_async_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::SUBMIT_JOB);
+    request.set_blocking(blocking);
+
+    auto* lp_request = request.mutable_lp_request();
+
+    // Set header
+    auto* header = lp_request->mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::LP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    serialize_problem_to_proto(view, lp_request->mutable_problem());
+    serialize_lp_settings_to_proto(settings, lp_request->mutable_settings());
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_async_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::SUBMIT_JOB);
+    request.set_blocking(blocking);
+
+    auto* mip_request = request.mutable_mip_request();
+
+    // Set header
+    auto* header = mip_request->mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::MIP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    serialize_problem_to_proto(view, mip_request->mutable_problem());
+
+    // Serialize all MIP settings (names match cuOpt API)
+    auto* pb_settings = mip_request->mutable_settings();
+    pb_settings->set_time_limit(settings.time_limit);
+    pb_settings->set_relative_mip_gap(settings.tolerances.relative_mip_gap);
+    pb_settings->set_absolute_mip_gap(settings.tolerances.absolute_mip_gap);
+    pb_settings->set_integrality_tolerance(settings.tolerances.integrality_tolerance);
+    pb_settings->set_absolute_tolerance(settings.tolerances.absolute_tolerance);
+    pb_settings->set_relative_tolerance(settings.tolerances.relative_tolerance);
+    pb_settings->set_presolve_absolute_tolerance(settings.tolerances.presolve_absolute_tolerance);
+    pb_settings->set_log_to_console(settings.log_to_console);
+    pb_settings->set_heuristics_only(settings.heuristics_only);
+    pb_settings->set_num_cpu_threads(settings.num_cpu_threads);
+    pb_settings->set_num_gpus(settings.num_gpus);
+    pb_settings->set_presolve(settings.presolve);
+    pb_settings->set_mip_scaling(settings.mip_scaling);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_status_request(const std::string& job_id) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::CHECK_STATUS);
+    request.set_job_id(job_id);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_get_result_request(const std::string& job_id) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::GET_RESULT);
+    request.set_job_id(job_id);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_delete_request(const std::string& job_id) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::DELETE_RESULT);
+    request.set_job_id(job_id);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_get_logs_request(const std::string& job_id,
+                                                  int64_t frombyte) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::GET_LOGS);
+    request.set_job_id(job_id);
+    request.set_frombyte(frombyte);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_cancel_request(const std::string& job_id) override
+  {
+    cuopt::remote::AsyncRequest request;
+    request.set_request_type(cuopt::remote::CANCEL_JOB);
+    request.set_job_id(job_id);
+
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  bool deserialize_submit_response(const std::vector<uint8_t>& data,
+                                   std::string& job_id,
+                                   std::string& error_message) override
+  {
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size())) {
+      error_message = "Failed to parse submit response";
+      return false;
+    }
+
+    if (!response.has_submit_response()) {
+      error_message = "Response is not a submit response";
+      return false;
+    }
+
+    const auto& submit = response.submit_response();
+    if (submit.status() != cuopt::remote::SUCCESS) {
+      error_message = submit.message();
+      return false;
+    }
+
+    job_id = submit.job_id();
+    return true;
+  }
+
+  job_status_t deserialize_status_response(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size()) || !response.has_status_response()) {
+      return job_status_t::NOT_FOUND;
+    }
+
+    switch (response.status_response().job_status()) {
+      case cuopt::remote::QUEUED: return job_status_t::QUEUED;
+      case cuopt::remote::PROCESSING: return job_status_t::PROCESSING;
+      case cuopt::remote::COMPLETED: return job_status_t::COMPLETED;
+      case cuopt::remote::FAILED: return job_status_t::FAILED;
+      case cuopt::remote::CANCELLED: return job_status_t::CANCELLED;
+      case cuopt::remote::NOT_FOUND:
+      default: return job_status_t::NOT_FOUND;
+    }
+  }
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_result_response(
+    const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size())) {
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse result response", cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!response.has_result_response()) {
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Response is not a result response", cuopt::error_type_t::RuntimeError));
+    }
+
+    const auto& result = response.result_response();
+    if (result.status() != cuopt::remote::SUCCESS) {
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error(result.error_message(), cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!result.has_lp_solution()) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Response does not contain LP solution", cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_lp_solution(result.lp_solution());
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_result_response(
+    const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size())) {
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse result response", cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!response.has_result_response()) {
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Response is not a result response", cuopt::error_type_t::RuntimeError));
+    }
+
+    const auto& result = response.result_response();
+    if (result.status() != cuopt::remote::SUCCESS) {
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error(result.error_message(), cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!result.has_mip_solution()) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error("Response does not contain MIP solution",
+                                                         cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_mip_solution(result.mip_solution());
+  }
+
+  typename remote_serializer_t<i_t, f_t>::logs_result_t deserialize_logs_response(
+    const std::vector<uint8_t>& data) override
+  {
+    typename remote_serializer_t<i_t, f_t>::logs_result_t result;
+    result.nbytes     = 0;
+    result.job_exists = false;
+
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size()) || !response.has_logs_response()) {
+      return result;
+    }
+
+    const auto& logs  = response.logs_response();
+    result.job_exists = logs.job_exists();
+    result.nbytes     = logs.nbytes();
+
+    result.log_lines.reserve(logs.log_lines_size());
+    for (int i = 0; i < logs.log_lines_size(); ++i) {
+      result.log_lines.push_back(logs.log_lines(i));
+    }
+
+    return result;
+  }
+
+  typename remote_serializer_t<i_t, f_t>::cancel_result_t deserialize_cancel_response(
+    const std::vector<uint8_t>& data) override
+  {
+    typename remote_serializer_t<i_t, f_t>::cancel_result_t result;
+    result.success    = false;
+    result.message    = "Failed to parse response";
+    result.job_status = job_status_t::NOT_FOUND;
+
+    cuopt::remote::AsyncResponse response;
+    if (!response.ParseFromArray(data.data(), data.size()) || !response.has_cancel_response()) {
+      return result;
+    }
+
+    const auto& cancel = response.cancel_response();
+    result.success     = (cancel.status() == cuopt::remote::SUCCESS);
+    result.message     = cancel.message();
+
+    switch (cancel.job_status()) {
+      case cuopt::remote::QUEUED: result.job_status = job_status_t::QUEUED; break;
+      case cuopt::remote::PROCESSING: result.job_status = job_status_t::PROCESSING; break;
+      case cuopt::remote::COMPLETED: result.job_status = job_status_t::COMPLETED; break;
+      case cuopt::remote::FAILED: result.job_status = job_status_t::FAILED; break;
+      case cuopt::remote::CANCELLED: result.job_status = job_status_t::CANCELLED; break;
+      case cuopt::remote::NOT_FOUND:
+      default: result.job_status = job_status_t::NOT_FOUND; break;
+    }
+
+    return result;
+  }
+
+  //============================================================================
+  // Server-side Async Request Handling
+  //============================================================================
+
+  bool is_async_request(const std::vector<uint8_t>& data) override
+  {
+    // An AsyncRequest is characterized by having the request_type field set
+    // and containing either lp_request or mip_request.
+    // We can detect it by checking if it parses as AsyncRequest AND has a job_data set.
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return false; }
+
+    // AsyncRequest must have either lp_request or mip_request set
+    // (the job_data oneof). If neither is set, it's not an async request
+    // or it's a status/result/delete request that has job_id instead.
+    bool has_job_data = request.has_lp_request() || request.has_mip_request();
+    bool has_job_id   = !request.job_id().empty();
+
+    // It's an async request if it has job_data OR job_id (for non-submit requests)
+    return has_job_data || has_job_id;
+  }
+
+  int get_async_request_type(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return -1; }
+
+    switch (request.request_type()) {
+      case cuopt::remote::SUBMIT_JOB: return 0;
+      case cuopt::remote::CHECK_STATUS: return 1;
+      case cuopt::remote::GET_RESULT: return 2;
+      case cuopt::remote::DELETE_RESULT: return 3;
+      case cuopt::remote::GET_LOGS: return 4;
+      case cuopt::remote::CANCEL_JOB: return 5;
+      case cuopt::remote::WAIT_FOR_RESULT: return 6;
+      default: return -1;
+    }
+  }
+
+  bool is_blocking_request(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return false; }
+    return request.blocking();
+  }
+
+  std::vector<uint8_t> extract_problem_data(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return {}; }
+
+    std::string serialized;
+    if (request.has_lp_request()) {
+      serialized = request.lp_request().SerializeAsString();
+    } else if (request.has_mip_request()) {
+      serialized = request.mip_request().SerializeAsString();
+    } else {
+      return {};
+    }
+
+    return std::vector<uint8_t>(serialized.begin(), serialized.end());
+  }
+
+  std::string get_job_id(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return ""; }
+    return request.job_id();
+  }
+
+  int64_t get_frombyte(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::AsyncRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) { return 0; }
+    return request.frombyte();
+  }
+
+  std::vector<uint8_t> serialize_submit_response(bool success, const std::string& result) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::SUBMIT_JOB);
+
+    auto* submit = response.mutable_submit_response();
+    if (success) {
+      submit->set_status(cuopt::remote::SUCCESS);
+      submit->set_job_id(result);
+      submit->set_message("Job submitted successfully");
+    } else {
+      submit->set_status(cuopt::remote::ERROR_INTERNAL);
+      submit->set_message(result);
+    }
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  std::vector<uint8_t> serialize_status_response(int status_code,
+                                                 const std::string& message) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::CHECK_STATUS);
+
+    auto* status = response.mutable_status_response();
+
+    switch (status_code) {
+      case 0: status->set_job_status(cuopt::remote::QUEUED); break;
+      case 1: status->set_job_status(cuopt::remote::PROCESSING); break;
+      case 2: status->set_job_status(cuopt::remote::COMPLETED); break;
+      case 3: status->set_job_status(cuopt::remote::FAILED); break;
+      case 4: status->set_job_status(cuopt::remote::NOT_FOUND); break;
+      case 5: status->set_job_status(cuopt::remote::CANCELLED); break;
+      default: status->set_job_status(cuopt::remote::NOT_FOUND); break;
+    }
+    status->set_message(message);
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  std::vector<uint8_t> serialize_result_response(bool success,
+                                                 const std::vector<uint8_t>& result_data,
+                                                 const std::string& error_message,
+                                                 bool is_mip = false) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::GET_RESULT);
+
+    auto* result = response.mutable_result_response();
+
+    if (success) {
+      result->set_status(cuopt::remote::SUCCESS);
+      // Parse and embed the solution based on problem type
+      if (is_mip) {
+        cuopt::remote::MIPSolution mip_sol;
+        if (mip_sol.ParseFromArray(result_data.data(), result_data.size())) {
+          result->mutable_mip_solution()->CopyFrom(mip_sol);
+        }
+      } else {
+        cuopt::remote::LPSolution lp_sol;
+        if (lp_sol.ParseFromArray(result_data.data(), result_data.size())) {
+          result->mutable_lp_solution()->CopyFrom(lp_sol);
+        }
+      }
+    } else {
+      result->set_status(cuopt::remote::ERROR_INTERNAL);
+      result->set_error_message(error_message);
+    }
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  std::vector<uint8_t> serialize_delete_response(bool success) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::DELETE_RESULT);
+
+    auto* del = response.mutable_delete_response();
+    del->set_status(success ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_NOT_FOUND);
+    del->set_message(success ? "Job deleted" : "Job not found");
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  std::vector<uint8_t> serialize_logs_response(const std::string& job_id,
+                                               const std::vector<std::string>& log_lines,
+                                               int64_t nbytes,
+                                               bool job_exists) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::GET_LOGS);
+
+    auto* logs = response.mutable_logs_response();
+    logs->set_status(job_exists ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_NOT_FOUND);
+    logs->set_job_id(job_id);
+    logs->set_nbytes(nbytes);
+    logs->set_job_exists(job_exists);
+
+    for (const auto& line : log_lines) {
+      logs->add_log_lines(line);
+    }
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  std::vector<uint8_t> serialize_cancel_response(bool success,
+                                                 const std::string& message,
+                                                 int status_code) override
+  {
+    cuopt::remote::AsyncResponse response;
+    response.set_request_type(cuopt::remote::CANCEL_JOB);
+
+    auto* cancel = response.mutable_cancel_response();
+    cancel->set_status(success ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_INTERNAL);
+    cancel->set_message(message);
+
+    switch (status_code) {
+      case 0: cancel->set_job_status(cuopt::remote::QUEUED); break;
+      case 1: cancel->set_job_status(cuopt::remote::PROCESSING); break;
+      case 2: cancel->set_job_status(cuopt::remote::COMPLETED); break;
+      case 3: cancel->set_job_status(cuopt::remote::FAILED); break;
+      case 4: cancel->set_job_status(cuopt::remote::NOT_FOUND); break;
+      case 5: cancel->set_job_status(cuopt::remote::CANCELLED); break;
+      default: cancel->set_job_status(cuopt::remote::NOT_FOUND); break;
+    }
+
+    std::vector<uint8_t> bytes(response.ByteSizeLong());
+    response.SerializeToArray(bytes.data(), bytes.size());
+    return bytes;
+  }
+
+  //============================================================================
+  // Metadata
+  //============================================================================
+
+  std::string format_name() const override { return "protobuf"; }
+
+  uint32_t protocol_version() const override { return 1; }
+
+ private:
+  //============================================================================
+  // Helper Methods - Problem Serialization
+  //============================================================================
+
+  void serialize_problem_to_proto(const mps_parser::data_model_view_t<i_t, f_t>& view,
+                                  cuopt::remote::OptimizationProblem* pb_problem)
+  {
+    // Note: view must point to CPU memory for serialization
+    // The solve functions ensure this by copying GPU data to CPU if needed
+
+    pb_problem->set_problem_name(view.get_problem_name());
+    pb_problem->set_objective_name(view.get_objective_name());
+    pb_problem->set_maximize(view.get_sense());  // get_sense() returns true for maximize
+    pb_problem->set_objective_scaling_factor(view.get_objective_scaling_factor());
+    pb_problem->set_objective_offset(view.get_objective_offset());
+
+    // Get spans for constraint matrix (CSR format)
+    auto values_span  = view.get_constraint_matrix_values();
+    auto indices_span = view.get_constraint_matrix_indices();
+    auto offsets_span = view.get_constraint_matrix_offsets();
+
+    auto nnz    = static_cast<i_t>(values_span.size());
+    auto n_rows = static_cast<i_t>(offsets_span.size()) - 1;
+
+    const f_t* values_ptr  = values_span.data();
+    const i_t* indices_ptr = indices_span.data();
+    const i_t* offsets_ptr = offsets_span.data();
+
+    // Constraint matrix A in CSR format (field names match data_model_view_t)
+    for (i_t i = 0; i < nnz; ++i) {
+      pb_problem->add_a(static_cast<double>(values_ptr[i]));
+    }
+    for (i_t i = 0; i < nnz; ++i) {
+      pb_problem->add_a_indices(static_cast<int32_t>(indices_ptr[i]));
+    }
+    for (i_t i = 0; i <= n_rows; ++i) {
+      pb_problem->add_a_offsets(static_cast<int32_t>(offsets_ptr[i]));
+    }
+
+    // Objective coefficients c (field name matches data_model_view_t: c_)
+    auto obj_span      = view.get_objective_coefficients();
+    auto n_cols        = static_cast<i_t>(obj_span.size());
+    const f_t* obj_ptr = obj_span.data();
+    for (i_t i = 0; i < n_cols; ++i) {
+      pb_problem->add_c(static_cast<double>(obj_ptr[i]));
+    }
+
+    // Variable bounds
+    auto lb_span      = view.get_variable_lower_bounds();
+    auto ub_span      = view.get_variable_upper_bounds();
+    const f_t* lb_ptr = lb_span.data();
+    const f_t* ub_ptr = ub_span.data();
+    for (i_t i = 0; i < n_cols; ++i) {
+      pb_problem->add_variable_lower_bounds(static_cast<double>(lb_ptr[i]));
+      pb_problem->add_variable_upper_bounds(static_cast<double>(ub_ptr[i]));
+    }
+
+    // Constraint bounds - need to handle both formats:
+    // 1. Direct lower/upper bounds (set_constraint_lower/upper_bounds)
+    // 2. RHS + row_types format (set_constraint_bounds + set_row_types)
+    auto c_lb_span = view.get_constraint_lower_bounds();
+    auto c_ub_span = view.get_constraint_upper_bounds();
+
+    if (c_lb_span.size() == static_cast<size_t>(n_rows) &&
+        c_ub_span.size() == static_cast<size_t>(n_rows)) {
+      // Direct format - use as-is
+      const f_t* c_lb_ptr = c_lb_span.data();
+      const f_t* c_ub_ptr = c_ub_span.data();
+      for (i_t i = 0; i < n_rows; ++i) {
+        pb_problem->add_constraint_lower_bounds(static_cast<double>(c_lb_ptr[i]));
+        pb_problem->add_constraint_upper_bounds(static_cast<double>(c_ub_ptr[i]));
+      }
+    } else {
+      // RHS + row_types format - compute lower/upper bounds
+      auto b_span         = view.get_constraint_bounds();
+      auto row_types_span = view.get_row_types();
+      const f_t* b_ptr    = b_span.data();
+      const char* rt_ptr  = row_types_span.data();
+
+      constexpr f_t inf = std::numeric_limits<f_t>::infinity();
+
+      for (i_t i = 0; i < n_rows; ++i) {
+        f_t lb, ub;
+        char row_type = (rt_ptr && row_types_span.size() > 0) ? rt_ptr[i] : 'E';
+        f_t rhs       = (b_ptr && b_span.size() > 0) ? b_ptr[i] : 0;
+
+        switch (row_type) {
+          case 'E':  // Equality: lb = ub = rhs
+            lb = rhs;
+            ub = rhs;
+            break;
+          case 'L':  // Less-than-or-equal: -inf <= Ax <= rhs
+            lb = -inf;
+            ub = rhs;
+            break;
+          case 'G':  // Greater-than-or-equal: rhs <= Ax <= inf
+            lb = rhs;
+            ub = inf;
+            break;
+          case 'N':  // Non-constraining (free)
+            lb = -inf;
+            ub = inf;
+            break;
+          default:  // Default to equality
+            lb = rhs;
+            ub = rhs;
+            break;
+        }
+        pb_problem->add_constraint_lower_bounds(static_cast<double>(lb));
+        pb_problem->add_constraint_upper_bounds(static_cast<double>(ub));
+      }
+    }
+
+    // Variable names (if available)
+    const auto& var_names = view.get_variable_names();
+    for (const auto& name : var_names) {
+      pb_problem->add_variable_names(name);
+    }
+
+    // Row names (if available)
+    const auto& row_names = view.get_row_names();
+    for (const auto& name : row_names) {
+      pb_problem->add_row_names(name);
+    }
+
+    // Variable types (for MIP problems) - stored as bytes to match data_model_view_t
+    auto var_types_span = view.get_variable_types();
+    if (var_types_span.size() > 0) {
+      pb_problem->set_variable_types(std::string(var_types_span.data(), var_types_span.size()));
+    }
+
+    // Row types - store directly as bytes
+    auto row_types_span = view.get_row_types();
+    if (row_types_span.size() > 0) {
+      pb_problem->set_row_types(std::string(row_types_span.data(), row_types_span.size()));
+    }
+
+    // Constraint bounds b (RHS) - store directly if available
+    auto b_span = view.get_constraint_bounds();
+    if (b_span.size() > 0) {
+      const f_t* b_ptr = b_span.data();
+      for (size_t i = 0; i < b_span.size(); ++i) {
+        pb_problem->add_b(static_cast<double>(b_ptr[i]));
+      }
+    }
+
+    // Initial solutions (if available)
+    auto init_primal_span = view.get_initial_primal_solution();
+    if (init_primal_span.size() > 0) {
+      const f_t* init_primal_ptr = init_primal_span.data();
+      for (size_t i = 0; i < init_primal_span.size(); ++i) {
+        pb_problem->add_initial_primal_solution(static_cast<double>(init_primal_ptr[i]));
+      }
+    }
+
+    auto init_dual_span = view.get_initial_dual_solution();
+    if (init_dual_span.size() > 0) {
+      const f_t* init_dual_ptr = init_dual_span.data();
+      for (size_t i = 0; i < init_dual_span.size(); ++i) {
+        pb_problem->add_initial_dual_solution(static_cast<double>(init_dual_ptr[i]));
+      }
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (view.has_quadratic_objective()) {
+      auto q_values_span  = view.get_quadratic_objective_values();
+      auto q_indices_span = view.get_quadratic_objective_indices();
+      auto q_offsets_span = view.get_quadratic_objective_offsets();
+
+      const f_t* q_values_ptr  = q_values_span.data();
+      const i_t* q_indices_ptr = q_indices_span.data();
+      const i_t* q_offsets_ptr = q_offsets_span.data();
+
+      for (size_t i = 0; i < q_values_span.size(); ++i) {
+        pb_problem->add_q_values(static_cast<double>(q_values_ptr[i]));
+      }
+      for (size_t i = 0; i < q_indices_span.size(); ++i) {
+        pb_problem->add_q_indices(static_cast<int32_t>(q_indices_ptr[i]));
+      }
+      for (size_t i = 0; i < q_offsets_span.size(); ++i) {
+        pb_problem->add_q_offsets(static_cast<int32_t>(q_offsets_ptr[i]));
+      }
+    }
+  }
+
+  // Convert cuOpt pdlp_solver_mode_t to protobuf enum
+  cuopt::remote::PDLPSolverMode to_proto_pdlp_mode(pdlp_solver_mode_t mode)
+  {
+    switch (mode) {
+      case pdlp_solver_mode_t::Stable1: return cuopt::remote::Stable1;
+      case pdlp_solver_mode_t::Stable2: return cuopt::remote::Stable2;
+      case pdlp_solver_mode_t::Methodical1: return cuopt::remote::Methodical1;
+      case pdlp_solver_mode_t::Fast1: return cuopt::remote::Fast1;
+      case pdlp_solver_mode_t::Stable3: return cuopt::remote::Stable3;
+      default: return cuopt::remote::Stable3;
+    }
+  }
+
+  // Convert cuOpt method_t to protobuf enum
+  cuopt::remote::LPMethod to_proto_method(method_t method)
+  {
+    switch (method) {
+      case method_t::Concurrent: return cuopt::remote::Concurrent;
+      case method_t::PDLP: return cuopt::remote::PDLP;
+      case method_t::DualSimplex: return cuopt::remote::DualSimplex;
+      case method_t::Barrier: return cuopt::remote::Barrier;
+      default: return cuopt::remote::Concurrent;
+    }
+  }
+
+  void serialize_lp_settings_to_proto(const pdlp_solver_settings_t<i_t, f_t>& settings,
+                                      cuopt::remote::PDLPSolverSettings* pb_settings)
+  {
+    // Termination tolerances (all names match cuOpt API)
+    pb_settings->set_absolute_gap_tolerance(settings.tolerances.absolute_gap_tolerance);
+    pb_settings->set_relative_gap_tolerance(settings.tolerances.relative_gap_tolerance);
+    pb_settings->set_primal_infeasible_tolerance(settings.tolerances.primal_infeasible_tolerance);
+    pb_settings->set_dual_infeasible_tolerance(settings.tolerances.dual_infeasible_tolerance);
+    pb_settings->set_absolute_dual_tolerance(settings.tolerances.absolute_dual_tolerance);
+    pb_settings->set_relative_dual_tolerance(settings.tolerances.relative_dual_tolerance);
+    pb_settings->set_absolute_primal_tolerance(settings.tolerances.absolute_primal_tolerance);
+    pb_settings->set_relative_primal_tolerance(settings.tolerances.relative_primal_tolerance);
+
+    // Limits
+    pb_settings->set_time_limit(settings.time_limit);
+    pb_settings->set_iteration_limit(static_cast<int64_t>(settings.iteration_limit));
+
+    // Solver configuration
+    pb_settings->set_log_to_console(settings.log_to_console);
+    pb_settings->set_detect_infeasibility(settings.detect_infeasibility);
+    pb_settings->set_strict_infeasibility(settings.strict_infeasibility);
+    pb_settings->set_pdlp_solver_mode(to_proto_pdlp_mode(settings.pdlp_solver_mode));
+    pb_settings->set_method(to_proto_method(settings.method));
+    pb_settings->set_presolve(settings.presolve);
+    pb_settings->set_dual_postsolve(settings.dual_postsolve);
+    pb_settings->set_crossover(settings.crossover);
+    pb_settings->set_num_gpus(settings.num_gpus);
+
+    // Advanced options
+    pb_settings->set_per_constraint_residual(settings.per_constraint_residual);
+    pb_settings->set_cudss_deterministic(settings.cudss_deterministic);
+    pb_settings->set_folding(settings.folding);
+    pb_settings->set_augmented(settings.augmented);
+    pb_settings->set_dualize(settings.dualize);
+    pb_settings->set_ordering(settings.ordering);
+    pb_settings->set_barrier_dual_initial_point(settings.barrier_dual_initial_point);
+    pb_settings->set_eliminate_dense_columns(settings.eliminate_dense_columns);
+    pb_settings->set_save_best_primal_so_far(settings.save_best_primal_so_far);
+    pb_settings->set_first_primal_feasible(settings.first_primal_feasible);
+  }
+
+  //============================================================================
+  // Helper Methods - Problem Deserialization
+  //============================================================================
+
+  void proto_to_mps_data(const cuopt::remote::OptimizationProblem& pb_problem,
+                         cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data)
+  {
+    mps_data.set_problem_name(pb_problem.problem_name());
+    mps_data.set_objective_name(pb_problem.objective_name());
+    mps_data.set_maximize(pb_problem.maximize());
+    mps_data.set_objective_scaling_factor(pb_problem.objective_scaling_factor());
+    mps_data.set_objective_offset(pb_problem.objective_offset());
+
+    // Constraint matrix A in CSR format (field names match data_model_view_t)
+    std::vector<f_t> values(pb_problem.a().begin(), pb_problem.a().end());
+    std::vector<i_t> indices(pb_problem.a_indices().begin(), pb_problem.a_indices().end());
+    std::vector<i_t> offsets(pb_problem.a_offsets().begin(), pb_problem.a_offsets().end());
+
+    mps_data.set_csr_constraint_matrix(values.data(),
+                                       static_cast<i_t>(values.size()),
+                                       indices.data(),
+                                       static_cast<i_t>(indices.size()),
+                                       offsets.data(),
+                                       static_cast<i_t>(offsets.size()));
+
+    // Objective coefficients c
+    std::vector<f_t> obj(pb_problem.c().begin(), pb_problem.c().end());
+    mps_data.set_objective_coefficients(obj.data(), static_cast<i_t>(obj.size()));
+
+    // Variable bounds
+    std::vector<f_t> var_lb(pb_problem.variable_lower_bounds().begin(),
+                            pb_problem.variable_lower_bounds().end());
+    std::vector<f_t> var_ub(pb_problem.variable_upper_bounds().begin(),
+                            pb_problem.variable_upper_bounds().end());
+    mps_data.set_variable_lower_bounds(var_lb.data(), static_cast<i_t>(var_lb.size()));
+    mps_data.set_variable_upper_bounds(var_ub.data(), static_cast<i_t>(var_ub.size()));
+
+    // Constraint bounds (prefer lower/upper bounds if available)
+    if (pb_problem.constraint_lower_bounds_size() > 0) {
+      std::vector<f_t> con_lb(pb_problem.constraint_lower_bounds().begin(),
+                              pb_problem.constraint_lower_bounds().end());
+      std::vector<f_t> con_ub(pb_problem.constraint_upper_bounds().begin(),
+                              pb_problem.constraint_upper_bounds().end());
+      mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast<i_t>(con_lb.size()));
+      mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast<i_t>(con_ub.size()));
+    } else if (pb_problem.b_size() > 0) {
+      // Use b (RHS) + row_types format
+      std::vector<f_t> b(pb_problem.b().begin(), pb_problem.b().end());
+      mps_data.set_constraint_bounds(b.data(), static_cast<i_t>(b.size()));
+
+      if (!pb_problem.row_types().empty()) {
+        const std::string& row_types = pb_problem.row_types();
+        mps_data.set_row_types(row_types.data(), static_cast<i_t>(row_types.size()));
+      }
+    }
+
+    // Variable names
+    if (pb_problem.variable_names_size() > 0) {
+      std::vector<std::string> var_names(pb_problem.variable_names().begin(),
+                                         pb_problem.variable_names().end());
+      mps_data.set_variable_names(var_names);
+    }
+
+    // Row names
+    if (pb_problem.row_names_size() > 0) {
+      std::vector<std::string> row_names(pb_problem.row_names().begin(),
+                                         pb_problem.row_names().end());
+      mps_data.set_row_names(row_names);
+    }
+
+    // Variable types (stored as bytes, matching data_model_view_t)
+    if (!pb_problem.variable_types().empty()) {
+      const std::string& var_types_str = pb_problem.variable_types();
+      std::vector<char> var_types(var_types_str.begin(), var_types_str.end());
+      mps_data.set_variable_types(var_types);
+    }
+
+    // Initial solutions (if provided)
+    if (pb_problem.initial_primal_solution_size() > 0) {
+      std::vector<f_t> init_primal(pb_problem.initial_primal_solution().begin(),
+                                   pb_problem.initial_primal_solution().end());
+      mps_data.set_initial_primal_solution(init_primal.data(),
+                                           static_cast<i_t>(init_primal.size()));
+    }
+
+    if (pb_problem.initial_dual_solution_size() > 0) {
+      std::vector<f_t> init_dual(pb_problem.initial_dual_solution().begin(),
+                                 pb_problem.initial_dual_solution().end());
+      mps_data.set_initial_dual_solution(init_dual.data(), static_cast<i_t>(init_dual.size()));
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (pb_problem.q_values_size() > 0) {
+      std::vector<f_t> q_values(pb_problem.q_values().begin(), pb_problem.q_values().end());
+      std::vector<i_t> q_indices(pb_problem.q_indices().begin(), pb_problem.q_indices().end());
+      std::vector<i_t> q_offsets(pb_problem.q_offsets().begin(), pb_problem.q_offsets().end());
+
+      mps_data.set_quadratic_objective_matrix(q_values.data(),
+                                              static_cast<i_t>(q_values.size()),
+                                              q_indices.data(),
+                                              static_cast<i_t>(q_indices.size()),
+                                              q_offsets.data(),
+                                              static_cast<i_t>(q_offsets.size()));
+    }
+  }
+
+  // Convert protobuf enum to cuOpt pdlp_solver_mode_t
+  pdlp_solver_mode_t from_proto_pdlp_mode(cuopt::remote::PDLPSolverMode mode)
+  {
+    switch (mode) {
+      case cuopt::remote::Stable1: return pdlp_solver_mode_t::Stable1;
+      case cuopt::remote::Stable2: return pdlp_solver_mode_t::Stable2;
+      case cuopt::remote::Methodical1: return pdlp_solver_mode_t::Methodical1;
+      case cuopt::remote::Fast1: return pdlp_solver_mode_t::Fast1;
+      case cuopt::remote::Stable3: return pdlp_solver_mode_t::Stable3;
+      default: return pdlp_solver_mode_t::Stable3;
+    }
+  }
+
+  // Convert protobuf enum to cuOpt method_t
+  method_t from_proto_method(cuopt::remote::LPMethod method)
+  {
+    switch (method) {
+      case cuopt::remote::Concurrent: return method_t::Concurrent;
+      case cuopt::remote::PDLP: return method_t::PDLP;
+      case cuopt::remote::DualSimplex: return method_t::DualSimplex;
+      case cuopt::remote::Barrier: return method_t::Barrier;
+      default: return method_t::Concurrent;
+    }
+  }
+
+  void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings,
+                            pdlp_solver_settings_t<i_t, f_t>& settings)
+  {
+    // Termination tolerances (all names match cuOpt API)
+    settings.tolerances.absolute_gap_tolerance      = pb_settings.absolute_gap_tolerance();
+    settings.tolerances.relative_gap_tolerance      = pb_settings.relative_gap_tolerance();
+    settings.tolerances.primal_infeasible_tolerance = pb_settings.primal_infeasible_tolerance();
+    settings.tolerances.dual_infeasible_tolerance   = pb_settings.dual_infeasible_tolerance();
+    settings.tolerances.absolute_dual_tolerance     = pb_settings.absolute_dual_tolerance();
+    settings.tolerances.relative_dual_tolerance     = pb_settings.relative_dual_tolerance();
+    settings.tolerances.absolute_primal_tolerance   = pb_settings.absolute_primal_tolerance();
+    settings.tolerances.relative_primal_tolerance   = pb_settings.relative_primal_tolerance();
+
+    // Limits
+    settings.time_limit      = pb_settings.time_limit();
+    settings.iteration_limit = static_cast<i_t>(pb_settings.iteration_limit());
+
+    // Solver configuration
+    settings.log_to_console       = pb_settings.log_to_console();
+    settings.detect_infeasibility = pb_settings.detect_infeasibility();
+    settings.strict_infeasibility = pb_settings.strict_infeasibility();
+    settings.pdlp_solver_mode     = from_proto_pdlp_mode(pb_settings.pdlp_solver_mode());
+    settings.method               = from_proto_method(pb_settings.method());
+    settings.presolve             = pb_settings.presolve();
+    settings.dual_postsolve       = pb_settings.dual_postsolve();
+    settings.crossover            = pb_settings.crossover();
+    settings.num_gpus             = pb_settings.num_gpus();
+
+    // Advanced options
+    settings.per_constraint_residual    = pb_settings.per_constraint_residual();
+    settings.cudss_deterministic        = pb_settings.cudss_deterministic();
+    settings.folding                    = pb_settings.folding();
+    settings.augmented                  = pb_settings.augmented();
+    settings.dualize                    = pb_settings.dualize();
+    settings.ordering                   = pb_settings.ordering();
+    settings.barrier_dual_initial_point = pb_settings.barrier_dual_initial_point();
+    settings.eliminate_dense_columns    = pb_settings.eliminate_dense_columns();
+    settings.save_best_primal_so_far    = pb_settings.save_best_primal_so_far();
+    settings.first_primal_feasible      = pb_settings.first_primal_feasible();
+  }
+
+  void proto_to_mip_settings(const cuopt::remote::MIPSolverSettings& pb_settings,
+                             mip_solver_settings_t<i_t, f_t>& settings)
+  {
+    // Limits
+    settings.time_limit = pb_settings.time_limit();
+
+    // Tolerances (all names match cuOpt API)
+    settings.tolerances.relative_mip_gap            = pb_settings.relative_mip_gap();
+    settings.tolerances.absolute_mip_gap            = pb_settings.absolute_mip_gap();
+    settings.tolerances.integrality_tolerance       = pb_settings.integrality_tolerance();
+    settings.tolerances.absolute_tolerance          = pb_settings.absolute_tolerance();
+    settings.tolerances.relative_tolerance          = pb_settings.relative_tolerance();
+    settings.tolerances.presolve_absolute_tolerance = pb_settings.presolve_absolute_tolerance();
+
+    // Solver configuration
+    settings.log_to_console  = pb_settings.log_to_console();
+    settings.heuristics_only = pb_settings.heuristics_only();
+    settings.num_cpu_threads = pb_settings.num_cpu_threads();
+    settings.num_gpus        = pb_settings.num_gpus();
+    settings.presolve        = pb_settings.presolve();
+    settings.mip_scaling     = pb_settings.mip_scaling();
+  }
+
+  //============================================================================
+  // Helper Methods - Solution Conversion
+  //============================================================================
+
+  optimization_problem_solution_t<i_t, f_t> proto_to_lp_solution(
+    const cuopt::remote::LPSolution& pb_solution)
+  {
+    // Create CPU-based solution
+    std::vector<f_t> primal(pb_solution.primal_solution().begin(),
+                            pb_solution.primal_solution().end());
+    std::vector<f_t> dual(pb_solution.dual_solution().begin(), pb_solution.dual_solution().end());
+    std::vector<f_t> reduced_cost(pb_solution.reduced_cost().begin(),
+                                  pb_solution.reduced_cost().end());
+
+    optimization_problem_solution_t<i_t, f_t> solution(
+      from_proto_status(pb_solution.termination_status()));
+
+    // Set solution data
+    solution.set_primal_solution_host(std::move(primal));
+    solution.set_dual_solution_host(std::move(dual));
+    solution.set_reduced_cost_host(std::move(reduced_cost));
+
+    // Set statistics
+    solution.set_l2_primal_residual(pb_solution.l2_primal_residual());
+    solution.set_l2_dual_residual(pb_solution.l2_dual_residual());
+    solution.set_primal_objective(pb_solution.primal_objective());
+    solution.set_dual_objective(pb_solution.dual_objective());
+    solution.set_gap(pb_solution.gap());
+    solution.set_nb_iterations(pb_solution.nb_iterations());
+    solution.set_solve_time(pb_solution.solve_time());
+    solution.set_solved_by_pdlp(pb_solution.solved_by_pdlp());
+
+    return solution;
+  }
+
+  void lp_solution_to_proto(const optimization_problem_solution_t<i_t, f_t>& solution,
+                            cuopt::remote::LPSolution* pb_solution)
+  {
+    pb_solution->set_termination_status(to_proto_status(solution.get_termination_status()));
+    pb_solution->set_error_message(solution.get_error_string());
+
+    // Solution vectors - handle both device and host memory
+    if (solution.is_device_memory()) {
+      // Copy from device to host
+      const auto& d_primal = solution.get_primal_solution();
+      const auto& d_dual   = solution.get_dual_solution();
+      // Note: reduced_cost getter is non-const, so we need to work around this
+
+      // Copy primal solution from device
+      if (d_primal.size() > 0) {
+        std::vector<f_t> h_primal(d_primal.size());
+        cudaMemcpy(
+          h_primal.data(), d_primal.data(), d_primal.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_primal) {
+          pb_solution->add_primal_solution(static_cast<double>(v));
+        }
+      }
+
+      // Copy dual solution from device
+      if (d_dual.size() > 0) {
+        std::vector<f_t> h_dual(d_dual.size());
+        cudaMemcpy(
+          h_dual.data(), d_dual.data(), d_dual.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_dual) {
+          pb_solution->add_dual_solution(static_cast<double>(v));
+        }
+      }
+
+      // For reduced cost, we can access via const cast since we're just reading
+      auto& nc_solution    = const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution);
+      auto& d_reduced_cost = nc_solution.get_reduced_cost();
+      if (d_reduced_cost.size() > 0) {
+        std::vector<f_t> h_reduced_cost(d_reduced_cost.size());
+        cudaMemcpy(h_reduced_cost.data(),
+                   d_reduced_cost.data(),
+                   d_reduced_cost.size() * sizeof(f_t),
+                   cudaMemcpyDeviceToHost);
+        for (const auto& v : h_reduced_cost) {
+          pb_solution->add_reduced_cost(static_cast<double>(v));
+        }
+      }
+    } else {
+      // Data is already on host
+      const auto& primal       = solution.get_primal_solution_host();
+      const auto& dual         = solution.get_dual_solution_host();
+      const auto& reduced_cost = solution.get_reduced_cost_host();
+
+      for (const auto& v : primal) {
+        pb_solution->add_primal_solution(static_cast<double>(v));
+      }
+      for (const auto& v : dual) {
+        pb_solution->add_dual_solution(static_cast<double>(v));
+      }
+      for (const auto& v : reduced_cost) {
+        pb_solution->add_reduced_cost(static_cast<double>(v));
+      }
+    }
+
+    // Statistics
+    pb_solution->set_l2_primal_residual(solution.get_l2_primal_residual());
+    pb_solution->set_l2_dual_residual(solution.get_l2_dual_residual());
+    pb_solution->set_primal_objective(solution.get_primal_objective());
+    pb_solution->set_dual_objective(solution.get_dual_objective());
+    pb_solution->set_gap(solution.get_gap());
+    pb_solution->set_nb_iterations(solution.get_nb_iterations());
+    pb_solution->set_solve_time(solution.get_solve_time());
+    pb_solution->set_solved_by_pdlp(solution.get_solved_by_pdlp());
+  }
+
+  mip_solution_t<i_t, f_t> proto_to_mip_solution(const cuopt::remote::MIPSolution& pb_solution)
+  {
+    std::vector<f_t> solution_vec(pb_solution.solution().begin(), pb_solution.solution().end());
+
+    // Create stats from protobuf data
+    solver_stats_t<i_t, f_t> stats;
+    stats.total_solve_time       = pb_solution.total_solve_time();
+    stats.presolve_time          = pb_solution.presolve_time();
+    stats.solution_bound         = pb_solution.solution_bound();
+    stats.num_nodes              = pb_solution.nodes();
+    stats.num_simplex_iterations = pb_solution.simplex_iterations();
+
+    mip_solution_t<i_t, f_t> solution(from_proto_mip_status(pb_solution.termination_status()),
+                                      stats);
+
+    solution.set_solution_host(std::move(solution_vec));
+    solution.set_objective(pb_solution.objective());
+    solution.set_mip_gap(pb_solution.mip_gap());
+    solution.set_max_constraint_violation(pb_solution.max_constraint_violation());
+    solution.set_max_int_violation(pb_solution.max_int_violation());
+    solution.set_max_variable_bound_violation(pb_solution.max_variable_bound_violation());
+
+    return solution;
+  }
+
+  void mip_solution_to_proto(const mip_solution_t<i_t, f_t>& solution,
+                             cuopt::remote::MIPSolution* pb_solution)
+  {
+    pb_solution->set_termination_status(to_proto_mip_status(solution.get_termination_status()));
+    pb_solution->set_error_message(solution.get_error_string());
+
+    // Handle both device and host memory
+    if (solution.is_device_memory()) {
+      const auto& d_sol = solution.get_solution();
+      if (d_sol.size() > 0) {
+        std::vector<f_t> h_sol(d_sol.size());
+        cudaMemcpy(h_sol.data(), d_sol.data(), d_sol.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_sol) {
+          pb_solution->add_solution(static_cast<double>(v));
+        }
+      }
+    } else {
+      const auto& sol_vec = solution.get_solution_host();
+      for (const auto& v : sol_vec) {
+        pb_solution->add_solution(static_cast<double>(v));
+      }
+    }
+
+    pb_solution->set_objective(solution.get_objective_value());
+    pb_solution->set_mip_gap(solution.get_mip_gap());
+    pb_solution->set_solution_bound(solution.get_solution_bound());
+    pb_solution->set_total_solve_time(solution.get_total_solve_time());
+    pb_solution->set_presolve_time(solution.get_presolve_time());
+    pb_solution->set_max_constraint_violation(solution.get_max_constraint_violation());
+    pb_solution->set_max_int_violation(solution.get_max_int_violation());
+    pb_solution->set_max_variable_bound_violation(solution.get_max_variable_bound_violation());
+    pb_solution->set_nodes(solution.get_num_nodes());
+    pb_solution->set_simplex_iterations(solution.get_num_simplex_iterations());
+  }
+};
+
+//============================================================================
+// Template Instantiations
+// Note: Only int32_t and double types are instantiated to avoid adding
+// int64_t instantiations throughout the codebase
+//============================================================================
+
+#if CUOPT_INSTANTIATE_FLOAT
+template class protobuf_serializer_t<int32_t, float>;
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template class protobuf_serializer_t<int32_t, double>;
+#endif
+
+//============================================================================
+// Factory Functions
+//============================================================================
+
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_default_serializer()
+{
+  static auto instance = std::make_shared<protobuf_serializer_t<i_t, f_t>>();
+  return instance;
+}
+
+// Explicit instantiations for factory functions
+#if CUOPT_INSTANTIATE_FLOAT
+template std::shared_ptr<remote_serializer_t<int32_t, float>> get_default_serializer();
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template std::shared_ptr<remote_serializer_t<int32_t, double>> get_default_serializer();
+#endif
+
+// Custom serializer loader (lazy-initialized)
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_serializer()
+{
+  static std::shared_ptr<remote_serializer_t<i_t, f_t>> instance;
+  static std::once_flag init_flag;
+
+  std::call_once(init_flag, []() {
+    const char* custom_lib = std::getenv("CUOPT_SERIALIZER_LIB");
+
+    if (custom_lib && custom_lib[0] != '\0') {
+      // Try to load custom serializer
+      CUOPT_LOG_INFO("[remote_solve] Loading custom serializer from: {}", custom_lib);
+
+      // Open the shared library
+      void* handle = dlopen(custom_lib, RTLD_NOW | RTLD_LOCAL);
+      if (!handle) {
+        CUOPT_LOG_ERROR("[remote_solve] Failed to load serializer library: {}", dlerror());
+        instance = get_default_serializer<i_t, f_t>();
+        return;
+      }
+
+      // Look for the factory function
+      // The function name includes template types for proper linking
+      std::string factory_name = "create_cuopt_serializer";
+      if constexpr (std::is_same_v<i_t, int32_t> && std::is_same_v<f_t, double>) {
+        factory_name = "create_cuopt_serializer_i32_f64";
+      } else if constexpr (std::is_same_v<i_t, int32_t> && std::is_same_v<f_t, float>) {
+        factory_name = "create_cuopt_serializer_i32_f32";
+      } else if constexpr (std::is_same_v<i_t, int64_t> && std::is_same_v<f_t, double>) {
+        factory_name = "create_cuopt_serializer_i64_f64";
+      } else if constexpr (std::is_same_v<i_t, int64_t> && std::is_same_v<f_t, float>) {
+        factory_name = "create_cuopt_serializer_i64_f32";
+      }
+
+      using factory_fn_t = std::unique_ptr<remote_serializer_t<i_t, f_t>> (*)();
+      auto factory       = reinterpret_cast<factory_fn_t>(dlsym(handle, factory_name.c_str()));
+
+      if (!factory) {
+        CUOPT_LOG_ERROR(
+          "[remote_solve] Factory function '{}' not found: {}", factory_name, dlerror());
+        dlclose(handle);
+        instance = get_default_serializer<i_t, f_t>();
+        return;
+      }
+
+      auto custom_serializer = factory();
+      if (custom_serializer) {
+        CUOPT_LOG_INFO("[remote_solve] Using custom serializer: {}",
+                       custom_serializer->format_name());
+        instance = std::move(custom_serializer);
+      } else {
+        CUOPT_LOG_ERROR("[remote_solve] Factory returned null, using default");
+        dlclose(handle);
+        instance = get_default_serializer<i_t, f_t>();
+      }
+      // Note: We intentionally don't dlclose(handle) here to keep the library loaded
+    } else {
+      instance = get_default_serializer<i_t, f_t>();
+    }
+  });
+
+  return instance;
+}
+
+// Explicit instantiations
+#if CUOPT_INSTANTIATE_FLOAT
+template std::shared_ptr<remote_serializer_t<int32_t, float>> get_serializer();
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template std::shared_ptr<remote_serializer_t<int32_t, double>> get_serializer();
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu
new file mode 100644
index 000000000..49c8e600f
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/remote_solve.cu
@@ -0,0 +1,710 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuopt/linear_programming/constants.h>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+#include <utilities/logger.hpp>
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+namespace {
+
+// Message types for streaming protocol (must match server)
+enum class MessageType : uint8_t {
+  LOG_MESSAGE = 0,  // Log output from server
+  SOLUTION    = 1,  // Final solution data
+};
+
+// Check if sync mode is enabled (default is async)
+static bool use_sync_mode()
+{
+  const char* sync_env = std::getenv("CUOPT_REMOTE_USE_SYNC");
+  return (sync_env != nullptr && std::string(sync_env) == "1");
+}
+
+/**
+ * @brief Simple socket client for remote solve with streaming support
+ */
+class remote_client_t {
+ public:
+  remote_client_t(const std::string& host, int port) : host_(host), port_(port), sockfd_(-1) {}
+
+  ~remote_client_t() { disconnect(); }
+
+  bool connect()
+  {
+    if (sockfd_ >= 0) return true;
+
+    sockfd_ = socket(AF_INET, SOCK_STREAM, 0);
+    if (sockfd_ < 0) {
+      CUOPT_LOG_ERROR("[remote_solve] Failed to create socket");
+      return false;
+    }
+
+    struct hostent* server = gethostbyname(host_.c_str());
+    if (server == nullptr) {
+      CUOPT_LOG_ERROR("[remote_solve] Unknown host: {}", host_);
+      close(sockfd_);
+      sockfd_ = -1;
+      return false;
+    }
+
+    struct sockaddr_in addr;
+    memset(&addr, 0, sizeof(addr));
+    addr.sin_family = AF_INET;
+    memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length);
+    addr.sin_port = htons(port_);
+
+    if (::connect(sockfd_, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+      CUOPT_LOG_ERROR("[remote_solve] Failed to connect to {}:{}", host_, port_);
+      close(sockfd_);
+      sockfd_ = -1;
+      return false;
+    }
+
+    return true;
+  }
+
+  void disconnect()
+  {
+    if (sockfd_ >= 0) {
+      close(sockfd_);
+      sockfd_ = -1;
+    }
+  }
+
+  bool send_request(const std::vector<uint8_t>& data)
+  {
+    if (sockfd_ < 0) return false;
+
+    // Send size first (8 bytes for large problem support)
+    uint64_t size = static_cast<uint64_t>(data.size());
+    if (!write_all(&size, sizeof(size))) return false;
+    if (!write_all(data.data(), data.size())) return false;
+    return true;
+  }
+
+  /**
+   * @brief Receive response with streaming log support.
+   *
+   * This method reads messages from the server. If the server sends LOG_MESSAGE
+   * types, they are printed to the console. When a SOLUTION message is received,
+   * the solution data is returned.
+   *
+   * @param data Output buffer for solution data
+   * @param log_to_console If true, print received log messages
+   * @return true if solution was received, false on error
+   */
+  bool receive_streaming_response(std::vector<uint8_t>& data, bool log_to_console)
+  {
+    if (sockfd_ < 0) return false;
+
+    while (true) {
+      // Read message type (1 byte)
+      uint8_t msg_type;
+      if (!read_all(&msg_type, 1)) {
+        // If we can't read the message type, try legacy format
+        // (server might not support streaming)
+        return receive_response_legacy(data);
+      }
+
+      // Read payload size (8 bytes for large problem support)
+      uint64_t payload_size;
+      if (!read_all(&payload_size, sizeof(payload_size))) return false;
+
+      // Sanity check - reject messages larger than 16GB
+      if (payload_size > 16ULL * 1024 * 1024 * 1024) {
+        CUOPT_LOG_ERROR("[remote_solve] Message too large: {} bytes", payload_size);
+        return false;
+      }
+
+      if (static_cast<MessageType>(msg_type) == MessageType::LOG_MESSAGE) {
+        // Read and display log message
+        if (payload_size > 0) {
+          std::vector<char> log_msg(payload_size + 1);
+          if (!read_all(log_msg.data(), payload_size)) return false;
+          log_msg[payload_size] = '\0';
+
+          if (log_to_console) {
+            // Print log message from server (already formatted)
+            std::cout << log_msg.data() << std::flush;
+          }
+        }
+      } else if (static_cast<MessageType>(msg_type) == MessageType::SOLUTION) {
+        // Read solution data
+        data.resize(payload_size);
+        if (payload_size > 0) {
+          if (!read_all(data.data(), payload_size)) return false;
+        }
+        return true;
+      } else {
+        CUOPT_LOG_WARN("[remote_solve] Unknown message type: {}", static_cast<int>(msg_type));
+        // Skip unknown message
+        if (payload_size > 0) {
+          std::vector<uint8_t> skip_buf(payload_size);
+          if (!read_all(skip_buf.data(), payload_size)) return false;
+        }
+      }
+    }
+  }
+
+  // Legacy response format (non-streaming)
+  bool receive_response(std::vector<uint8_t>& data) { return receive_response_legacy(data); }
+
+ private:
+  bool receive_response_legacy(std::vector<uint8_t>& data)
+  {
+    if (sockfd_ < 0) return false;
+
+    // Read size first (8 bytes for large problem support)
+    uint64_t size;
+    if (!read_all(&size, sizeof(size))) return false;
+
+    // Sanity check - reject responses larger than 16GB
+    if (size > 16ULL * 1024 * 1024 * 1024) {
+      CUOPT_LOG_ERROR("[remote_solve] Response too large: {} bytes", size);
+      return false;
+    }
+
+    data.resize(size);
+    if (!read_all(data.data(), size)) return false;
+    return true;
+  }
+
+  bool write_all(const void* buf, size_t len)
+  {
+    const uint8_t* ptr = static_cast<const uint8_t*>(buf);
+    size_t remaining   = len;
+    while (remaining > 0) {
+      ssize_t n = ::write(sockfd_, ptr, remaining);
+      if (n <= 0) {
+        CUOPT_LOG_ERROR("[remote_solve] Write failed");
+        return false;
+      }
+      ptr += n;
+      remaining -= n;
+    }
+    return true;
+  }
+
+  bool read_all(void* buf, size_t len)
+  {
+    uint8_t* ptr     = static_cast<uint8_t*>(buf);
+    size_t remaining = len;
+    while (remaining > 0) {
+      ssize_t n = ::read(sockfd_, ptr, remaining);
+      if (n <= 0) {
+        CUOPT_LOG_ERROR("[remote_solve] Read failed");
+        return false;
+      }
+      ptr += n;
+      remaining -= n;
+    }
+    return true;
+  }
+
+  std::string host_;
+  int port_;
+  int sockfd_;
+};
+
+//============================================================================
+// Async Mode Helpers
+//============================================================================
+
+template <typename i_t, typename f_t>
+static std::pair<bool, std::string> submit_job(const std::string& host,
+                                               int port,
+                                               const std::vector<uint8_t>& request_data)
+{
+  remote_client_t client(host, port);
+  if (!client.connect()) { return {false, "Failed to connect to server"}; }
+
+  if (!client.send_request(request_data)) { return {false, "Failed to send request"}; }
+
+  std::vector<uint8_t> response_data;
+  if (!client.receive_response(response_data)) { return {false, "Failed to receive response"}; }
+
+  auto serializer = get_serializer<i_t, f_t>();
+  std::string job_id;
+  std::string error_message;
+  if (!serializer->deserialize_submit_response(response_data, job_id, error_message)) {
+    return {false, error_message};
+  }
+
+  return {true, job_id};
+}
+
+/**
+ * @brief Retrieve and display buffered logs from the server.
+ *
+ * @param host Server host
+ * @param port Server port
+ * @param job_id Job ID
+ * @param frombyte Byte offset to start reading from
+ * @return std::pair<bool, int64_t> - (job_exists, new_frombyte)
+ */
+template <typename i_t, typename f_t>
+static std::pair<bool, int64_t> get_logs(const std::string& host,
+                                         int port,
+                                         const std::string& job_id,
+                                         int64_t frombyte)
+{
+  remote_client_t client(host, port);
+  if (!client.connect()) { return {false, frombyte}; }
+
+  auto serializer   = get_serializer<i_t, f_t>();
+  auto logs_request = serializer->serialize_get_logs_request(job_id, frombyte);
+
+  if (!client.send_request(logs_request)) { return {false, frombyte}; }
+
+  std::vector<uint8_t> response_data;
+  if (!client.receive_response(response_data)) { return {false, frombyte}; }
+
+  auto result = serializer->deserialize_logs_response(response_data);
+
+  // Print any new log lines
+  for (const auto& line : result.log_lines) {
+    std::cout << line << "\n";
+  }
+  if (!result.log_lines.empty()) { std::cout.flush(); }
+
+  return {result.job_exists, result.nbytes};
+}
+
+template <typename i_t, typename f_t>
+static bool poll_until_complete(const std::string& host,
+                                int port,
+                                const std::string& job_id,
+                                bool verbose)
+{
+  auto serializer    = get_serializer<i_t, f_t>();
+  using job_status_t = typename remote_serializer_t<i_t, f_t>::job_status_t;
+
+  int64_t log_frombyte = 0;  // Track position in log file
+
+  while (true) {
+    // Fetch and display any new log entries
+    if (verbose) {
+      auto [job_exists, new_frombyte] = get_logs<i_t, f_t>(host, port, job_id, log_frombyte);
+      if (job_exists) { log_frombyte = new_frombyte; }
+    }
+
+    remote_client_t client(host, port);
+    if (!client.connect()) {
+      CUOPT_LOG_ERROR("[remote_solve] Failed to connect for status check");
+      return false;
+    }
+
+    auto status_request = serializer->serialize_status_request(job_id);
+    if (!client.send_request(status_request)) {
+      CUOPT_LOG_ERROR("[remote_solve] Failed to send status request");
+      return false;
+    }
+
+    std::vector<uint8_t> response_data;
+    if (!client.receive_response(response_data)) {
+      CUOPT_LOG_ERROR("[remote_solve] Failed to receive status response");
+      return false;
+    }
+
+    auto status = serializer->deserialize_status_response(response_data);
+
+    if (status == job_status_t::COMPLETED) {
+      // Fetch any remaining log entries
+      if (verbose) {
+        get_logs<i_t, f_t>(host, port, job_id, log_frombyte);
+        CUOPT_LOG_INFO("[remote_solve] Job {} completed", job_id);
+      }
+      return true;
+    } else if (status == job_status_t::FAILED) {
+      // Fetch any remaining log entries (may contain error info)
+      if (verbose) { get_logs<i_t, f_t>(host, port, job_id, log_frombyte); }
+      CUOPT_LOG_ERROR("[remote_solve] Job {} failed", job_id);
+      return false;
+    } else if (status == job_status_t::NOT_FOUND) {
+      CUOPT_LOG_ERROR("[remote_solve] Job {} not found", job_id);
+      return false;
+    }
+
+    // Job still queued or processing, wait and try again
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+}
+
+template <typename i_t, typename f_t>
+static std::pair<bool, std::vector<uint8_t>> get_result(const std::string& host,
+                                                        int port,
+                                                        const std::string& job_id)
+{
+  remote_client_t client(host, port);
+  if (!client.connect()) { return {false, {}}; }
+
+  auto serializer     = get_serializer<i_t, f_t>();
+  auto result_request = serializer->serialize_get_result_request(job_id);
+
+  if (!client.send_request(result_request)) { return {false, {}}; }
+
+  std::vector<uint8_t> response_data;
+  if (!client.receive_response(response_data)) { return {false, {}}; }
+
+  return {true, response_data};
+}
+
+template <typename i_t, typename f_t>
+static void delete_job(const std::string& host, int port, const std::string& job_id)
+{
+  remote_client_t client(host, port);
+  if (!client.connect()) { return; }
+
+  auto serializer     = get_serializer<i_t, f_t>();
+  auto delete_request = serializer->serialize_delete_request(job_id);
+
+  if (!client.send_request(delete_request)) { return; }
+
+  std::vector<uint8_t> response_data;
+  client.receive_response(response_data);  // Ignore result
+}
+
+template <typename i_t, typename f_t>
+static cancel_job_result_t cancel_job_impl(const std::string& host,
+                                           int port,
+                                           const std::string& job_id)
+{
+  cancel_job_result_t result;
+  result.success    = false;
+  result.message    = "Unknown error";
+  result.job_status = remote_job_status_t::NOT_FOUND;
+
+  remote_client_t client(host, port);
+  if (!client.connect()) {
+    result.message = "Failed to connect to server";
+    return result;
+  }
+
+  auto serializer     = get_serializer<i_t, f_t>();
+  auto cancel_request = serializer->serialize_cancel_request(job_id);
+
+  if (!client.send_request(cancel_request)) {
+    result.message = "Failed to send cancel request";
+    return result;
+  }
+
+  std::vector<uint8_t> response_data;
+  if (!client.receive_response(response_data)) {
+    result.message = "Failed to receive response";
+    return result;
+  }
+
+  // Deserialize the cancel response
+  auto cancel_result = serializer->deserialize_cancel_response(response_data);
+
+  result.success = cancel_result.success;
+  result.message = cancel_result.message;
+
+  // Map serializer job_status_t to remote_job_status_t
+  using serializer_status = typename remote_serializer_t<i_t, f_t>::job_status_t;
+  switch (cancel_result.job_status) {
+    case serializer_status::QUEUED: result.job_status = remote_job_status_t::QUEUED; break;
+    case serializer_status::PROCESSING: result.job_status = remote_job_status_t::PROCESSING; break;
+    case serializer_status::COMPLETED: result.job_status = remote_job_status_t::COMPLETED; break;
+    case serializer_status::FAILED: result.job_status = remote_job_status_t::FAILED; break;
+    case serializer_status::NOT_FOUND: result.job_status = remote_job_status_t::NOT_FOUND; break;
+    case serializer_status::CANCELLED: result.job_status = remote_job_status_t::CANCELLED; break;
+  }
+
+  return result;
+}
+
+}  // namespace
+
+//============================================================================
+// LP Remote Solve
+//============================================================================
+
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const pdlp_solver_settings_t<i_t, f_t>& settings)
+{
+  const bool sync_mode = use_sync_mode();
+
+  CUOPT_LOG_INFO("[remote_solve] Solving LP remotely on {}:{} ({} mode)",
+                 config.host,
+                 config.port,
+                 sync_mode ? "sync" : "async");
+
+  // Log problem info (similar to local solve)
+  if (settings.log_to_console) {
+    auto n_rows = view.get_constraint_matrix_offsets().size() > 0
+                    ? static_cast<i_t>(view.get_constraint_matrix_offsets().size()) - 1
+                    : 0;
+    auto n_cols = static_cast<i_t>(view.get_objective_coefficients().size());
+    auto nnz    = static_cast<i_t>(view.get_constraint_matrix_values().size());
+    CUOPT_LOG_INFO("Solving a problem with %d constraints, %d variables, and %d nonzeros (remote)",
+                   n_rows,
+                   n_cols,
+                   nnz);
+  }
+
+  auto serializer = get_serializer<i_t, f_t>();
+
+  if (sync_mode) {
+    //=========================================================================
+    // SYNC/BLOCKING MODE: Unified architecture
+    //
+    // Server-side: Job goes through queue, handled by worker process.
+    // Client blocks until completion (server uses condition variable).
+    // This enables cancellation for "sync" jobs and concurrent solves.
+    //=========================================================================
+
+    // Serialize as async request with blocking=true
+    std::vector<uint8_t> request_data =
+      serializer->serialize_async_lp_request(view, settings, true /* blocking */);
+    CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (blocking): {} bytes",
+                    request_data.size());
+
+    // Connect and send
+    remote_client_t client(config.host, config.port);
+    if (!client.connect()) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Failed to connect to remote server", cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!client.send_request(request_data)) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Failed to send request to remote server", cuopt::error_type_t::RuntimeError));
+    }
+
+    // Receive response (server blocks until job completes, then returns result)
+    std::vector<uint8_t> response_data;
+    if (!client.receive_response(response_data)) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError));
+    }
+
+    CUOPT_LOG_DEBUG("[remote_solve] Received LP result (blocking): {} bytes", response_data.size());
+
+    // Deserialize solution from result response (same format as async GET_RESULT)
+    return serializer->deserialize_lp_result_response(response_data);
+
+  } else {
+    //=========================================================================
+    // ASYNC MODE: Submit → Poll → Get Result → Delete
+    //=========================================================================
+
+    // Serialize as async request with blocking=false
+    std::vector<uint8_t> request_data =
+      serializer->serialize_async_lp_request(view, settings, false /* blocking */);
+    CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (async): {} bytes", request_data.size());
+
+    // Submit job
+    auto [submit_ok, job_id_or_error] =
+      submit_job<i_t, f_t>(config.host, config.port, request_data);
+    if (!submit_ok) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Job submission failed: " + job_id_or_error, cuopt::error_type_t::RuntimeError));
+    }
+    std::string job_id = job_id_or_error;
+    CUOPT_LOG_INFO("[remote_solve] Job submitted, ID: {}", job_id);
+
+    // Poll until complete
+    if (!poll_until_complete<i_t, f_t>(config.host, config.port, job_id, settings.log_to_console)) {
+      delete_job<i_t, f_t>(config.host, config.port, job_id);
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Job failed or not found", cuopt::error_type_t::RuntimeError));
+    }
+
+    // Get result
+    auto [result_ok, result_data] = get_result<i_t, f_t>(config.host, config.port, job_id);
+    if (!result_ok) {
+      delete_job<i_t, f_t>(config.host, config.port, job_id);
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to retrieve result", cuopt::error_type_t::RuntimeError));
+    }
+
+    // Delete job from server
+    delete_job<i_t, f_t>(config.host, config.port, job_id);
+    CUOPT_LOG_DEBUG("[remote_solve] Job {} deleted from server", job_id);
+
+    // Deserialize solution from async result response
+    return serializer->deserialize_lp_result_response(result_data);
+  }
+}
+
+//============================================================================
+// MIP Remote Solve
+//============================================================================
+
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const mip_solver_settings_t<i_t, f_t>& settings)
+{
+  const bool sync_mode = use_sync_mode();
+
+  CUOPT_LOG_INFO("[remote_solve] Solving MIP remotely on {}:{} ({} mode)",
+                 config.host,
+                 config.port,
+                 sync_mode ? "sync" : "async");
+
+  // Log problem info
+  {
+    auto n_rows = view.get_constraint_matrix_offsets().size() > 0
+                    ? static_cast<i_t>(view.get_constraint_matrix_offsets().size()) - 1
+                    : 0;
+    auto n_cols = static_cast<i_t>(view.get_objective_coefficients().size());
+    auto nnz    = static_cast<i_t>(view.get_constraint_matrix_values().size());
+    CUOPT_LOG_INFO(
+      "Solving a problem with %d constraints, %d variables, and %d nonzeros (remote MIP)",
+      n_rows,
+      n_cols,
+      nnz);
+  }
+
+  auto serializer = get_serializer<i_t, f_t>();
+
+  if (sync_mode) {
+    //=========================================================================
+    // SYNC/BLOCKING MODE: Unified architecture
+    //
+    // Server-side: Job goes through queue, handled by worker process.
+    // Client blocks until completion (server uses condition variable).
+    // This enables cancellation for "sync" jobs and concurrent solves.
+    //=========================================================================
+
+    std::vector<uint8_t> request_data =
+      serializer->serialize_async_mip_request(view, settings, true /* blocking */);
+    CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (blocking): {} bytes",
+                    request_data.size());
+
+    remote_client_t client(config.host, config.port);
+    if (!client.connect()) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error("Failed to connect to remote server",
+                                                         cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!client.send_request(request_data)) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error("Failed to send request to remote server",
+                                                         cuopt::error_type_t::RuntimeError));
+    }
+
+    // Receive response (server blocks until job completes, then returns result)
+    std::vector<uint8_t> response_data;
+    if (!client.receive_response(response_data)) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError));
+    }
+
+    CUOPT_LOG_DEBUG("[remote_solve] Received MIP result (blocking): {} bytes",
+                    response_data.size());
+
+    // Deserialize solution from result response (same format as async GET_RESULT)
+    return serializer->deserialize_mip_result_response(response_data);
+
+  } else {
+    //=========================================================================
+    // ASYNC MODE: Submit → Poll → Get Result → Delete
+    //=========================================================================
+
+    std::vector<uint8_t> request_data =
+      serializer->serialize_async_mip_request(view, settings, false /* blocking */);
+    CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (async): {} bytes", request_data.size());
+
+    // Submit job
+    auto [submit_ok, job_id_or_error] =
+      submit_job<i_t, f_t>(config.host, config.port, request_data);
+    if (!submit_ok) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error(
+        "Job submission failed: " + job_id_or_error, cuopt::error_type_t::RuntimeError));
+    }
+    std::string job_id = job_id_or_error;
+    CUOPT_LOG_INFO("[remote_solve] Job submitted, ID: {}", job_id);
+
+    // Poll until complete
+    if (!poll_until_complete<i_t, f_t>(config.host, config.port, job_id, true /* verbose */)) {
+      delete_job<i_t, f_t>(config.host, config.port, job_id);
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Job failed or not found", cuopt::error_type_t::RuntimeError));
+    }
+
+    // Get result
+    auto [result_ok, result_data] = get_result<i_t, f_t>(config.host, config.port, job_id);
+    if (!result_ok) {
+      delete_job<i_t, f_t>(config.host, config.port, job_id);
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to retrieve result", cuopt::error_type_t::RuntimeError));
+    }
+
+    // Delete job from server
+    delete_job<i_t, f_t>(config.host, config.port, job_id);
+    CUOPT_LOG_DEBUG("[remote_solve] Job {} deleted from server", job_id);
+
+    // Deserialize solution from async result response
+    return serializer->deserialize_mip_result_response(result_data);
+  }
+}
+
+//============================================================================
+// Cancel Job Remote
+//============================================================================
+
+cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config,
+                                      const std::string& job_id)
+{
+  CUOPT_LOG_INFO("[remote_solve] Cancelling job {} on {}:{}", job_id, config.host, config.port);
+
+  // Use int32_t, double as the type parameters (doesn't affect cancel logic)
+  auto result = cancel_job_impl<int32_t, double>(config.host, config.port, job_id);
+
+  if (result.success) {
+    CUOPT_LOG_INFO("[remote_solve] Job {} cancelled successfully", job_id);
+  } else {
+    CUOPT_LOG_WARN("[remote_solve] Failed to cancel job {}: {}", job_id, result.message);
+  }
+
+  return result;
+}
+
+// Explicit instantiations
+#if CUOPT_INSTANTIATE_FLOAT
+template optimization_problem_solution_t<int32_t, float> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, float>& view,
+  const pdlp_solver_settings_t<int32_t, float>& settings);
+
+template mip_solution_t<int32_t, float> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, float>& view,
+  const mip_solver_settings_t<int32_t, float>& settings);
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template optimization_problem_solution_t<int32_t, double> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, double>& view,
+  const pdlp_solver_settings_t<int32_t, double>& settings);
+
+template mip_solution_t<int32_t, double> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, double>& view,
+  const mip_solver_settings_t<int32_t, double>& settings);
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt b/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt
new file mode 100644
index 000000000..f5f509089
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt
@@ -0,0 +1,55 @@
+# cmake-format: off
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# cmake-format: on
+
+# MsgPack Serializer - demonstrates pluggable serialization
+#
+# This builds a separate shared library that implements the remote_serializer_t interface
+# using MsgPack instead of Protocol Buffers.
+
+find_package(msgpack-cxx QUIET)
+
+if(msgpack-cxx_FOUND)
+  message(STATUS "Building msgpack serializer plugin")
+
+  add_library(cuopt_msgpack_serializer SHARED
+    msgpack_serializer.cpp
+  )
+
+  target_include_directories(cuopt_msgpack_serializer
+    PRIVATE
+    "${CMAKE_SOURCE_DIR}/include"
+    "${CMAKE_SOURCE_DIR}/libmps_parser/include"
+    "${CMAKE_SOURCE_DIR}/src"
+    ${CUDA_INCLUDE_DIRS}
+  )
+
+  # Required for RMM headers
+  target_compile_definitions(cuopt_msgpack_serializer
+    PRIVATE
+    LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
+  )
+
+  target_link_libraries(cuopt_msgpack_serializer
+    PRIVATE
+    msgpack-cxx
+    rmm::rmm
+    raft::raft
+    cuopt
+  )
+
+  set_target_properties(cuopt_msgpack_serializer PROPERTIES
+    CXX_STANDARD 17
+    CXX_STANDARD_REQUIRED ON
+    POSITION_INDEPENDENT_CODE ON
+  )
+
+  # Install the plugin
+  install(TARGETS cuopt_msgpack_serializer
+    COMPONENT runtime
+    LIBRARY DESTINATION lib
+  )
+else()
+  message(STATUS "msgpack-cxx not found, skipping msgpack serializer plugin")
+endif()
diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp
new file mode 100644
index 000000000..018382df5
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp
@@ -0,0 +1,1455 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * MsgPack-based serializer for cuOpt remote solve.
+ * This demonstrates the pluggable serialization interface.
+ *
+ * NOTE: This is a CPU-only serializer. For solutions on GPU memory, it will
+ * return empty solution vectors. For production use with GPU memory support,
+ * convert this to a .cu file and use CUDA memory copy operations.
+ */
+
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <msgpack.hpp>
+
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+// Message type identifiers for sync mode
+constexpr uint8_t MSG_LP_REQUEST   = 1;
+constexpr uint8_t MSG_MIP_REQUEST  = 2;
+constexpr uint8_t MSG_LP_SOLUTION  = 3;
+constexpr uint8_t MSG_MIP_SOLUTION = 4;
+
+// Message type identifiers for async mode
+constexpr uint8_t MSG_ASYNC_LP_REQUEST  = 10;
+constexpr uint8_t MSG_ASYNC_MIP_REQUEST = 11;
+constexpr uint8_t MSG_STATUS_REQUEST    = 12;
+constexpr uint8_t MSG_GET_RESULT        = 13;
+constexpr uint8_t MSG_DELETE_REQUEST    = 14;
+constexpr uint8_t MSG_GET_LOGS          = 15;
+constexpr uint8_t MSG_CANCEL_REQUEST    = 16;
+constexpr uint8_t MSG_WAIT_REQUEST      = 17;
+
+constexpr uint8_t MSG_SUBMIT_RESPONSE = 20;
+constexpr uint8_t MSG_STATUS_RESPONSE = 21;
+constexpr uint8_t MSG_LOGS_RESPONSE   = 22;
+constexpr uint8_t MSG_CANCEL_RESPONSE = 23;
+
+template <typename i_t, typename f_t>
+class msgpack_serializer_t : public remote_serializer_t<i_t, f_t> {
+ public:
+  msgpack_serializer_t() { std::cout << "[msgpack_serializer] Initialized\n"; }
+
+  ~msgpack_serializer_t() override = default;
+
+  std::string format_name() const override { return "msgpack"; }
+
+  uint32_t protocol_version() const override { return 1; }
+
+  //============================================================================
+  // LP Request Serialization
+  //============================================================================
+
+  std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    pk.pack_uint8(MSG_LP_REQUEST);
+    pk.pack_uint32(protocol_version());
+    pack_problem(pk, view);
+
+    // Pack all LP settings (field names match cuOpt API)
+    pk.pack_map(28);
+    // Termination tolerances
+    pk.pack("absolute_gap_tolerance");
+    pk.pack(settings.tolerances.absolute_gap_tolerance);
+    pk.pack("relative_gap_tolerance");
+    pk.pack(settings.tolerances.relative_gap_tolerance);
+    pk.pack("primal_infeasible_tolerance");
+    pk.pack(settings.tolerances.primal_infeasible_tolerance);
+    pk.pack("dual_infeasible_tolerance");
+    pk.pack(settings.tolerances.dual_infeasible_tolerance);
+    pk.pack("absolute_dual_tolerance");
+    pk.pack(settings.tolerances.absolute_dual_tolerance);
+    pk.pack("relative_dual_tolerance");
+    pk.pack(settings.tolerances.relative_dual_tolerance);
+    pk.pack("absolute_primal_tolerance");
+    pk.pack(settings.tolerances.absolute_primal_tolerance);
+    pk.pack("relative_primal_tolerance");
+    pk.pack(settings.tolerances.relative_primal_tolerance);
+    // Limits
+    pk.pack("time_limit");
+    pk.pack(settings.time_limit);
+    pk.pack("iteration_limit");
+    pk.pack(static_cast<int64_t>(settings.iteration_limit));
+    // Solver configuration
+    pk.pack("log_to_console");
+    pk.pack(settings.log_to_console);
+    pk.pack("detect_infeasibility");
+    pk.pack(settings.detect_infeasibility);
+    pk.pack("strict_infeasibility");
+    pk.pack(settings.strict_infeasibility);
+    pk.pack("pdlp_solver_mode");
+    pk.pack(static_cast<int>(settings.pdlp_solver_mode));
+    pk.pack("method");
+    pk.pack(static_cast<int>(settings.method));
+    pk.pack("presolve");
+    pk.pack(settings.presolve);
+    pk.pack("dual_postsolve");
+    pk.pack(settings.dual_postsolve);
+    pk.pack("crossover");
+    pk.pack(settings.crossover);
+    pk.pack("num_gpus");
+    pk.pack(settings.num_gpus);
+    // Advanced options
+    pk.pack("per_constraint_residual");
+    pk.pack(settings.per_constraint_residual);
+    pk.pack("cudss_deterministic");
+    pk.pack(settings.cudss_deterministic);
+    pk.pack("folding");
+    pk.pack(settings.folding);
+    pk.pack("augmented");
+    pk.pack(settings.augmented);
+    pk.pack("dualize");
+    pk.pack(settings.dualize);
+    pk.pack("ordering");
+    pk.pack(settings.ordering);
+    pk.pack("barrier_dual_initial_point");
+    pk.pack(settings.barrier_dual_initial_point);
+    pk.pack("eliminate_dense_columns");
+    pk.pack(settings.eliminate_dense_columns);
+    pk.pack("save_best_primal_so_far");
+    pk.pack(settings.save_best_primal_so_far);
+    pk.pack("first_primal_feasible");
+    pk.pack(settings.first_primal_feasible);
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    pk.pack_uint8(MSG_MIP_REQUEST);
+    pk.pack_uint32(protocol_version());
+    pack_problem(pk, view);
+
+    // Pack all MIP settings (field names match cuOpt API)
+    pk.pack_map(13);
+    // Limits
+    pk.pack("time_limit");
+    pk.pack(settings.time_limit);
+    // Tolerances
+    pk.pack("relative_mip_gap");
+    pk.pack(settings.tolerances.relative_mip_gap);
+    pk.pack("absolute_mip_gap");
+    pk.pack(settings.tolerances.absolute_mip_gap);
+    pk.pack("integrality_tolerance");
+    pk.pack(settings.tolerances.integrality_tolerance);
+    pk.pack("absolute_tolerance");
+    pk.pack(settings.tolerances.absolute_tolerance);
+    pk.pack("relative_tolerance");
+    pk.pack(settings.tolerances.relative_tolerance);
+    pk.pack("presolve_absolute_tolerance");
+    pk.pack(settings.tolerances.presolve_absolute_tolerance);
+    // Solver configuration
+    pk.pack("log_to_console");
+    pk.pack(settings.log_to_console);
+    pk.pack("heuristics_only");
+    pk.pack(settings.heuristics_only);
+    pk.pack("num_cpu_threads");
+    pk.pack(settings.num_cpu_threads);
+    pk.pack("num_gpus");
+    pk.pack(settings.num_gpus);
+    pk.pack("presolve");
+    pk.pack(settings.presolve);
+    pk.pack("mip_scaling");
+    pk.pack(settings.mip_scaling);
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  //============================================================================
+  // Solution Deserialization (client-side)
+  //============================================================================
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    try {
+      msgpack::object_handle oh =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size());
+      msgpack::object obj = oh.get();
+
+      if (obj.type != msgpack::type::ARRAY || obj.via.array.size < 6) {
+        return optimization_problem_solution_t<i_t, f_t>(
+          cuopt::logic_error("Invalid LP solution format", cuopt::error_type_t::RuntimeError));
+      }
+
+      auto& arr     = obj.via.array;
+      auto status   = static_cast<pdlp_termination_status_t>(arr.ptr[1].as<int>());
+      f_t obj_value = arr.ptr[2].as<double>();
+
+      std::vector<f_t> primal_sol;
+      arr.ptr[3].convert(primal_sol);
+
+      std::vector<f_t> dual_sol;
+      arr.ptr[4].convert(dual_sol);
+
+      f_t solve_time = arr.ptr[5].as<double>();
+
+      optimization_problem_solution_t<i_t, f_t> solution(status);
+      solution.set_primal_objective(obj_value);
+      solution.set_primal_solution_host(std::move(primal_sol));
+      solution.set_dual_solution_host(std::move(dual_sol));
+      solution.set_solve_time(solve_time);
+
+      return solution;
+    } catch (const std::exception& e) {
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        std::string("MsgPack LP parse error: ") + e.what(), cuopt::error_type_t::RuntimeError));
+    }
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_solution(const std::vector<uint8_t>& data) override
+  {
+    try {
+      msgpack::object_handle oh =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size());
+      msgpack::object obj = oh.get();
+
+      if (obj.type != msgpack::type::ARRAY || obj.via.array.size < 6) {
+        return mip_solution_t<i_t, f_t>(
+          cuopt::logic_error("Invalid MIP solution format", cuopt::error_type_t::RuntimeError));
+      }
+
+      auto& arr     = obj.via.array;
+      auto status   = static_cast<mip_termination_status_t>(arr.ptr[1].as<int>());
+      f_t obj_value = arr.ptr[2].as<double>();
+
+      std::vector<f_t> sol;
+      arr.ptr[3].convert(sol);
+
+      f_t solve_time = arr.ptr[4].as<double>();
+      f_t mip_gap    = arr.ptr[5].as<double>();
+
+      solver_stats_t<i_t, f_t> stats;
+      stats.total_solve_time = solve_time;
+
+      mip_solution_t<i_t, f_t> solution(std::move(sol),
+                                        std::vector<std::string>{},
+                                        obj_value,
+                                        mip_gap,
+                                        status,
+                                        0.0,
+                                        0.0,
+                                        0.0,
+                                        stats);
+
+      return solution;
+    } catch (const std::exception& e) {
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error(
+        std::string("MsgPack MIP parse error: ") + e.what(), cuopt::error_type_t::RuntimeError));
+    }
+  }
+
+  //============================================================================
+  // Server-side Operations
+  //============================================================================
+
+  bool is_mip_request(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return false;
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      if (oh.get().type == msgpack::type::POSITIVE_INTEGER) {
+        uint8_t msg_type = oh.get().as<uint8_t>();
+        // Check both sync and async MIP request types
+        return msg_type == MSG_MIP_REQUEST || msg_type == MSG_ASYNC_MIP_REQUEST;
+      }
+    } catch (...) {
+    }
+    return false;
+  }
+
+  bool deserialize_lp_request(const std::vector<uint8_t>& data,
+                              mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                              pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    try {
+      size_t offset = 0;
+
+      msgpack::object_handle oh1 =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh1.get().as<uint8_t>();
+      if (msg_type != MSG_LP_REQUEST) return false;
+
+      msgpack::object_handle oh2 =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // version = oh2.get().as<uint32_t>();
+
+      unpack_problem(data, offset, mps_data);
+
+      msgpack::object_handle oh_settings =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      auto settings_map = oh_settings.get().as<std::map<std::string, msgpack::object>>();
+
+      // Deserialize all LP settings (field names match cuOpt API)
+      // Termination tolerances
+      if (settings_map.count("absolute_gap_tolerance")) {
+        settings.tolerances.absolute_gap_tolerance =
+          settings_map["absolute_gap_tolerance"].as<double>();
+      }
+      if (settings_map.count("relative_gap_tolerance")) {
+        settings.tolerances.relative_gap_tolerance =
+          settings_map["relative_gap_tolerance"].as<double>();
+      }
+      if (settings_map.count("primal_infeasible_tolerance")) {
+        settings.tolerances.primal_infeasible_tolerance =
+          settings_map["primal_infeasible_tolerance"].as<double>();
+      }
+      if (settings_map.count("dual_infeasible_tolerance")) {
+        settings.tolerances.dual_infeasible_tolerance =
+          settings_map["dual_infeasible_tolerance"].as<double>();
+      }
+      if (settings_map.count("absolute_dual_tolerance")) {
+        settings.tolerances.absolute_dual_tolerance =
+          settings_map["absolute_dual_tolerance"].as<double>();
+      }
+      if (settings_map.count("relative_dual_tolerance")) {
+        settings.tolerances.relative_dual_tolerance =
+          settings_map["relative_dual_tolerance"].as<double>();
+      }
+      if (settings_map.count("absolute_primal_tolerance")) {
+        settings.tolerances.absolute_primal_tolerance =
+          settings_map["absolute_primal_tolerance"].as<double>();
+      }
+      if (settings_map.count("relative_primal_tolerance")) {
+        settings.tolerances.relative_primal_tolerance =
+          settings_map["relative_primal_tolerance"].as<double>();
+      }
+      // Limits
+      if (settings_map.count("time_limit")) {
+        settings.time_limit = settings_map["time_limit"].as<double>();
+      }
+      if (settings_map.count("iteration_limit")) {
+        settings.iteration_limit = settings_map["iteration_limit"].as<i_t>();
+      }
+      // Solver configuration
+      if (settings_map.count("log_to_console")) {
+        settings.log_to_console = settings_map["log_to_console"].as<bool>();
+      }
+      if (settings_map.count("detect_infeasibility")) {
+        settings.detect_infeasibility = settings_map["detect_infeasibility"].as<bool>();
+      }
+      if (settings_map.count("strict_infeasibility")) {
+        settings.strict_infeasibility = settings_map["strict_infeasibility"].as<bool>();
+      }
+      if (settings_map.count("pdlp_solver_mode")) {
+        settings.pdlp_solver_mode =
+          static_cast<pdlp_solver_mode_t>(settings_map["pdlp_solver_mode"].as<int>());
+      }
+      if (settings_map.count("method")) {
+        settings.method = static_cast<method_t>(settings_map["method"].as<int>());
+      }
+      if (settings_map.count("presolve")) {
+        settings.presolve = settings_map["presolve"].as<bool>();
+      }
+      if (settings_map.count("dual_postsolve")) {
+        settings.dual_postsolve = settings_map["dual_postsolve"].as<bool>();
+      }
+      if (settings_map.count("crossover")) {
+        settings.crossover = settings_map["crossover"].as<bool>();
+      }
+      if (settings_map.count("num_gpus")) {
+        settings.num_gpus = settings_map["num_gpus"].as<int>();
+      }
+      // Advanced options
+      if (settings_map.count("per_constraint_residual")) {
+        settings.per_constraint_residual = settings_map["per_constraint_residual"].as<bool>();
+      }
+      if (settings_map.count("cudss_deterministic")) {
+        settings.cudss_deterministic = settings_map["cudss_deterministic"].as<bool>();
+      }
+      if (settings_map.count("folding")) { settings.folding = settings_map["folding"].as<i_t>(); }
+      if (settings_map.count("augmented")) {
+        settings.augmented = settings_map["augmented"].as<i_t>();
+      }
+      if (settings_map.count("dualize")) { settings.dualize = settings_map["dualize"].as<i_t>(); }
+      if (settings_map.count("ordering")) {
+        settings.ordering = settings_map["ordering"].as<i_t>();
+      }
+      if (settings_map.count("barrier_dual_initial_point")) {
+        settings.barrier_dual_initial_point = settings_map["barrier_dual_initial_point"].as<i_t>();
+      }
+      if (settings_map.count("eliminate_dense_columns")) {
+        settings.eliminate_dense_columns = settings_map["eliminate_dense_columns"].as<bool>();
+      }
+      if (settings_map.count("save_best_primal_so_far")) {
+        settings.save_best_primal_so_far = settings_map["save_best_primal_so_far"].as<bool>();
+      }
+      if (settings_map.count("first_primal_feasible")) {
+        settings.first_primal_feasible = settings_map["first_primal_feasible"].as<bool>();
+      }
+
+      return true;
+    } catch (const std::exception& e) {
+      std::cerr << "[msgpack_serializer] LP request parse error: " << e.what() << "\n";
+      return false;
+    }
+  }
+
+  bool deserialize_mip_request(const std::vector<uint8_t>& data,
+                               mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                               mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    try {
+      size_t offset = 0;
+
+      msgpack::object_handle oh1 =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh1.get().as<uint8_t>();
+      if (msg_type != MSG_MIP_REQUEST) return false;
+
+      msgpack::object_handle oh2 =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+
+      unpack_problem(data, offset, mps_data);
+
+      msgpack::object_handle oh_settings =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      auto settings_map = oh_settings.get().as<std::map<std::string, msgpack::object>>();
+
+      // Deserialize all MIP settings (field names match cuOpt API)
+      // Limits
+      if (settings_map.count("time_limit")) {
+        settings.time_limit = settings_map["time_limit"].as<double>();
+      }
+      // Tolerances
+      if (settings_map.count("relative_mip_gap")) {
+        settings.tolerances.relative_mip_gap = settings_map["relative_mip_gap"].as<double>();
+      }
+      if (settings_map.count("absolute_mip_gap")) {
+        settings.tolerances.absolute_mip_gap = settings_map["absolute_mip_gap"].as<double>();
+      }
+      if (settings_map.count("integrality_tolerance")) {
+        settings.tolerances.integrality_tolerance =
+          settings_map["integrality_tolerance"].as<double>();
+      }
+      if (settings_map.count("absolute_tolerance")) {
+        settings.tolerances.absolute_tolerance = settings_map["absolute_tolerance"].as<double>();
+      }
+      if (settings_map.count("relative_tolerance")) {
+        settings.tolerances.relative_tolerance = settings_map["relative_tolerance"].as<double>();
+      }
+      if (settings_map.count("presolve_absolute_tolerance")) {
+        settings.tolerances.presolve_absolute_tolerance =
+          settings_map["presolve_absolute_tolerance"].as<double>();
+      }
+      // Solver configuration
+      if (settings_map.count("log_to_console")) {
+        settings.log_to_console = settings_map["log_to_console"].as<bool>();
+      }
+      if (settings_map.count("heuristics_only")) {
+        settings.heuristics_only = settings_map["heuristics_only"].as<bool>();
+      }
+      if (settings_map.count("num_cpu_threads")) {
+        settings.num_cpu_threads = settings_map["num_cpu_threads"].as<i_t>();
+      }
+      if (settings_map.count("num_gpus")) {
+        settings.num_gpus = settings_map["num_gpus"].as<i_t>();
+      }
+      if (settings_map.count("presolve")) {
+        settings.presolve = settings_map["presolve"].as<bool>();
+      }
+      if (settings_map.count("mip_scaling")) {
+        settings.mip_scaling = settings_map["mip_scaling"].as<bool>();
+      }
+
+      return true;
+    } catch (const std::exception& e) {
+      std::cerr << "[msgpack_serializer] MIP request parse error: " << e.what() << "\n";
+      return false;
+    }
+  }
+
+  std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    // Pack as array: [msg_type, status, obj_value, primal_sol, dual_sol, solve_time]
+    pk.pack_array(6);
+    pk.pack_uint8(MSG_LP_SOLUTION);
+    pk.pack(static_cast<int>(solution.get_termination_status()));
+    pk.pack(static_cast<double>(solution.get_objective_value()));
+
+    // Note: If solution is on GPU, we can't access it from pure C++ code
+    // For production, this should be a .cu file with CUDA support
+    if (!solution.is_device_memory()) {
+      auto primal = solution.get_primal_solution_host();
+      pk.pack_array(primal.size());
+      for (size_t i = 0; i < primal.size(); ++i) {
+        pk.pack(static_cast<double>(primal[i]));
+      }
+
+      auto dual = solution.get_dual_solution_host();
+      pk.pack_array(dual.size());
+      for (size_t i = 0; i < dual.size(); ++i) {
+        pk.pack(static_cast<double>(dual[i]));
+      }
+    } else {
+      // GPU memory - return empty arrays (limitation of pure C++ serializer)
+      pk.pack_array(0);
+      pk.pack_array(0);
+    }
+
+    pk.pack(static_cast<double>(solution.get_solve_time()));
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_mip_solution(const mip_solution_t<i_t, f_t>& solution) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    pk.pack_array(6);
+    pk.pack_uint8(MSG_MIP_SOLUTION);
+    pk.pack(static_cast<int>(solution.get_termination_status()));
+    pk.pack(static_cast<double>(solution.get_objective_value()));
+
+    if (!solution.is_device_memory()) {
+      auto sol = solution.get_solution_host();
+      pk.pack_array(sol.size());
+      for (size_t i = 0; i < sol.size(); ++i) {
+        pk.pack(static_cast<double>(sol[i]));
+      }
+    } else {
+      pk.pack_array(0);
+    }
+
+    pk.pack(static_cast<double>(solution.get_stats().total_solve_time));
+    pk.pack(static_cast<double>(solution.get_mip_gap()));
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  //============================================================================
+  // Async Operations
+  //============================================================================
+
+  std::vector<uint8_t> serialize_async_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    // Header: msg_type, blocking flag, job_id (empty for new submission)
+    pk.pack_uint8(MSG_ASYNC_LP_REQUEST);
+    pk.pack(blocking);
+    pk.pack(std::string(""));  // job_id empty for new submission
+    pk.pack(int64_t(0));       // frombyte (unused for LP requests)
+
+    // Pack the problem and settings
+    pk.pack_uint32(protocol_version());
+    pack_problem(pk, view);
+
+    // Pack all LP settings (field names match cuOpt API)
+    pk.pack_map(28);
+    // Termination tolerances
+    pk.pack("absolute_gap_tolerance");
+    pk.pack(settings.tolerances.absolute_gap_tolerance);
+    pk.pack("relative_gap_tolerance");
+    pk.pack(settings.tolerances.relative_gap_tolerance);
+    pk.pack("primal_infeasible_tolerance");
+    pk.pack(settings.tolerances.primal_infeasible_tolerance);
+    pk.pack("dual_infeasible_tolerance");
+    pk.pack(settings.tolerances.dual_infeasible_tolerance);
+    pk.pack("absolute_dual_tolerance");
+    pk.pack(settings.tolerances.absolute_dual_tolerance);
+    pk.pack("relative_dual_tolerance");
+    pk.pack(settings.tolerances.relative_dual_tolerance);
+    pk.pack("absolute_primal_tolerance");
+    pk.pack(settings.tolerances.absolute_primal_tolerance);
+    pk.pack("relative_primal_tolerance");
+    pk.pack(settings.tolerances.relative_primal_tolerance);
+    // Limits
+    pk.pack("time_limit");
+    pk.pack(settings.time_limit);
+    pk.pack("iteration_limit");
+    pk.pack(static_cast<int64_t>(settings.iteration_limit));
+    // Solver configuration
+    pk.pack("log_to_console");
+    pk.pack(settings.log_to_console);
+    pk.pack("detect_infeasibility");
+    pk.pack(settings.detect_infeasibility);
+    pk.pack("strict_infeasibility");
+    pk.pack(settings.strict_infeasibility);
+    pk.pack("pdlp_solver_mode");
+    pk.pack(static_cast<int>(settings.pdlp_solver_mode));
+    pk.pack("method");
+    pk.pack(static_cast<int>(settings.method));
+    pk.pack("presolve");
+    pk.pack(settings.presolve);
+    pk.pack("dual_postsolve");
+    pk.pack(settings.dual_postsolve);
+    pk.pack("crossover");
+    pk.pack(settings.crossover);
+    pk.pack("num_gpus");
+    pk.pack(settings.num_gpus);
+    // Advanced options
+    pk.pack("per_constraint_residual");
+    pk.pack(settings.per_constraint_residual);
+    pk.pack("cudss_deterministic");
+    pk.pack(settings.cudss_deterministic);
+    pk.pack("folding");
+    pk.pack(settings.folding);
+    pk.pack("augmented");
+    pk.pack(settings.augmented);
+    pk.pack("dualize");
+    pk.pack(settings.dualize);
+    pk.pack("ordering");
+    pk.pack(settings.ordering);
+    pk.pack("barrier_dual_initial_point");
+    pk.pack(settings.barrier_dual_initial_point);
+    pk.pack("eliminate_dense_columns");
+    pk.pack(settings.eliminate_dense_columns);
+    pk.pack("save_best_primal_so_far");
+    pk.pack(settings.save_best_primal_so_far);
+    pk.pack("first_primal_feasible");
+    pk.pack(settings.first_primal_feasible);
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_async_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings,
+    bool blocking) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+
+    // Header: msg_type, blocking flag, job_id (empty for new submission)
+    pk.pack_uint8(MSG_ASYNC_MIP_REQUEST);
+    pk.pack(blocking);
+    pk.pack(std::string(""));  // job_id empty for new submission
+    pk.pack(int64_t(0));       // frombyte (unused for MIP requests)
+
+    // Pack the problem and settings
+    pk.pack_uint32(protocol_version());
+    pack_problem(pk, view);
+
+    // Pack all MIP settings (field names match cuOpt API)
+    pk.pack_map(13);
+    // Limits
+    pk.pack("time_limit");
+    pk.pack(settings.time_limit);
+    // Tolerances
+    pk.pack("relative_mip_gap");
+    pk.pack(settings.tolerances.relative_mip_gap);
+    pk.pack("absolute_mip_gap");
+    pk.pack(settings.tolerances.absolute_mip_gap);
+    pk.pack("integrality_tolerance");
+    pk.pack(settings.tolerances.integrality_tolerance);
+    pk.pack("absolute_tolerance");
+    pk.pack(settings.tolerances.absolute_tolerance);
+    pk.pack("relative_tolerance");
+    pk.pack(settings.tolerances.relative_tolerance);
+    pk.pack("presolve_absolute_tolerance");
+    pk.pack(settings.tolerances.presolve_absolute_tolerance);
+    // Solver configuration
+    pk.pack("log_to_console");
+    pk.pack(settings.log_to_console);
+    pk.pack("heuristics_only");
+    pk.pack(settings.heuristics_only);
+    pk.pack("num_cpu_threads");
+    pk.pack(settings.num_cpu_threads);
+    pk.pack("num_gpus");
+    pk.pack(settings.num_gpus);
+    pk.pack("presolve");
+    pk.pack(settings.presolve);
+    pk.pack("mip_scaling");
+    pk.pack(settings.mip_scaling);
+
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_status_request(const std::string& job_id) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_STATUS_REQUEST);
+    pk.pack(false);  // blocking (unused)
+    pk.pack(job_id);
+    pk.pack(int64_t(0));  // frombyte (unused)
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_get_result_request(const std::string& job_id) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_GET_RESULT);
+    pk.pack(false);  // blocking (unused)
+    pk.pack(job_id);
+    pk.pack(int64_t(0));  // frombyte (unused)
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_delete_request(const std::string& job_id) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_DELETE_REQUEST);
+    pk.pack(false);  // blocking (unused)
+    pk.pack(job_id);
+    pk.pack(int64_t(0));  // frombyte (unused)
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_get_logs_request(const std::string& job_id,
+                                                  int64_t frombyte = 0) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_GET_LOGS);
+    pk.pack(false);  // blocking (unused)
+    pk.pack(job_id);
+    pk.pack(frombyte);
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_cancel_request(const std::string& job_id) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_CANCEL_REQUEST);
+    pk.pack(false);  // blocking (unused)
+    pk.pack(job_id);
+    pk.pack(int64_t(0));  // frombyte (unused)
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  bool deserialize_submit_response(const std::vector<uint8_t>& data,
+                                   std::string& job_id,
+                                   std::string& error_message) override
+  {
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh_type.get().as<uint8_t>();
+
+      if (msg_type != MSG_SUBMIT_RESPONSE) {
+        error_message = "Invalid response type";
+        return false;
+      }
+
+      msgpack::object_handle oh_success =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      bool success = oh_success.get().as<bool>();
+
+      msgpack::object_handle oh_job_id =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      job_id = oh_job_id.get().as<std::string>();
+
+      if (!success) {
+        msgpack::object_handle oh_err =
+          msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+        error_message = oh_err.get().as<std::string>();
+      }
+
+      return success;
+    } catch (const std::exception& e) {
+      error_message = std::string("MsgPack parse error: ") + e.what();
+      return false;
+    }
+  }
+
+  typename remote_serializer_t<i_t, f_t>::job_status_t deserialize_status_response(
+    const std::vector<uint8_t>& data) override
+  {
+    using job_status_t = typename remote_serializer_t<i_t, f_t>::job_status_t;
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh_type.get().as<uint8_t>();
+
+      if (msg_type != MSG_STATUS_RESPONSE) { return job_status_t::NOT_FOUND; }
+
+      msgpack::object_handle oh_status =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      int status = oh_status.get().as<int>();
+
+      // Status codes: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND, 5=CANCELLED
+      switch (status) {
+        case 0: return job_status_t::QUEUED;
+        case 1: return job_status_t::PROCESSING;
+        case 2: return job_status_t::COMPLETED;
+        case 3: return job_status_t::FAILED;
+        case 4: return job_status_t::NOT_FOUND;
+        case 5: return job_status_t::CANCELLED;
+        default: return job_status_t::NOT_FOUND;
+      }
+    } catch (...) {
+      return job_status_t::NOT_FOUND;
+    }
+  }
+
+  typename remote_serializer_t<i_t, f_t>::logs_result_t deserialize_logs_response(
+    const std::vector<uint8_t>& data) override
+  {
+    typename remote_serializer_t<i_t, f_t>::logs_result_t result;
+    result.nbytes     = 0;
+    result.job_exists = false;
+
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh_type.get().as<uint8_t>();
+
+      if (msg_type != MSG_LOGS_RESPONSE) { return result; }
+
+      msgpack::object_handle oh_exists =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      result.job_exists = oh_exists.get().as<bool>();
+
+      msgpack::object_handle oh_nbytes =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      result.nbytes = oh_nbytes.get().as<int64_t>();
+
+      msgpack::object_handle oh_lines =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      auto lines_array = oh_lines.get().as<std::vector<std::string>>();
+      result.log_lines = std::move(lines_array);
+    } catch (...) {
+    }
+    return result;
+  }
+
+  typename remote_serializer_t<i_t, f_t>::cancel_result_t deserialize_cancel_response(
+    const std::vector<uint8_t>& data) override
+  {
+    using job_status_t = typename remote_serializer_t<i_t, f_t>::job_status_t;
+    typename remote_serializer_t<i_t, f_t>::cancel_result_t result;
+    result.success    = false;
+    result.message    = "Failed to parse response";
+    result.job_status = job_status_t::NOT_FOUND;
+
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh_type.get().as<uint8_t>();
+
+      if (msg_type != MSG_CANCEL_RESPONSE) { return result; }
+
+      msgpack::object_handle oh_success =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      result.success = oh_success.get().as<bool>();
+
+      msgpack::object_handle oh_message =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      result.message = oh_message.get().as<std::string>();
+
+      msgpack::object_handle oh_status =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      int status_code = oh_status.get().as<int>();
+
+      switch (status_code) {
+        case 0: result.job_status = job_status_t::QUEUED; break;
+        case 1: result.job_status = job_status_t::PROCESSING; break;
+        case 2: result.job_status = job_status_t::COMPLETED; break;
+        case 3: result.job_status = job_status_t::FAILED; break;
+        case 4: result.job_status = job_status_t::NOT_FOUND; break;
+        case 5: result.job_status = job_status_t::CANCELLED; break;
+        default: result.job_status = job_status_t::NOT_FOUND; break;
+      }
+    } catch (...) {
+    }
+    return result;
+  }
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_result_response(
+    const std::vector<uint8_t>& data) override
+  {
+    return deserialize_lp_solution(data);
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_result_response(
+    const std::vector<uint8_t>& data) override
+  {
+    return deserialize_mip_solution(data);
+  }
+
+  //============================================================================
+  // Server-side async request detection
+  //============================================================================
+
+  bool is_async_request(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return false;
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      if (oh.get().type == msgpack::type::POSITIVE_INTEGER) {
+        uint8_t msg_type = oh.get().as<uint8_t>();
+        return msg_type >= MSG_ASYNC_LP_REQUEST && msg_type <= MSG_CANCEL_REQUEST;
+      }
+    } catch (...) {
+    }
+    return false;
+  }
+
+  bool is_blocking_request(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return false;
+    try {
+      size_t offset = 0;
+      // Skip msg_type
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+
+      // Read blocking flag
+      msgpack::object_handle oh_blocking =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      return oh_blocking.get().as<bool>();
+    } catch (...) {
+    }
+    return false;
+  }
+
+  std::vector<uint8_t> extract_problem_data(const std::vector<uint8_t>& data) override
+  {
+    // For msgpack, we extract the problem portion by repacking
+    // The full request contains: msg_type, blocking, job_id, frombyte, version, problem, settings
+    // We need to return a sync-style request: msg_type, version, problem, settings
+    if (data.empty()) return {};
+
+    try {
+      size_t offset = 0;
+
+      // Read header
+      msgpack::object_handle oh_type =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh_type.get().as<uint8_t>();
+
+      // Map async type to sync type
+      uint8_t sync_type = (msg_type == MSG_ASYNC_MIP_REQUEST) ? MSG_MIP_REQUEST : MSG_LP_REQUEST;
+
+      // Skip blocking, job_id, frombyte
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);  // blocking
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);  // job_id
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);  // frombyte
+
+      // Create sync-style request: msg_type, then rest of data (version, problem, settings)
+      msgpack::sbuffer buffer;
+      msgpack::packer<msgpack::sbuffer> pk(&buffer);
+      pk.pack_uint8(sync_type);
+
+      // Append the rest of the data (version, problem, settings)
+      std::vector<uint8_t> result(buffer.data(), buffer.data() + buffer.size());
+      result.insert(result.end(), data.begin() + offset, data.end());
+      return result;
+
+    } catch (...) {
+    }
+    return {};
+  }
+
+  int64_t get_frombyte(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return 0;
+    try {
+      size_t offset = 0;
+      // Skip msg_type
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // Skip blocking
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // Skip job_id
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // Read frombyte
+      msgpack::object_handle oh_frombyte =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      return oh_frombyte.get().as<int64_t>();
+    } catch (...) {
+    }
+    return 0;
+  }
+
+  int get_async_request_type(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return -1;
+    try {
+      size_t offset = 0;
+      msgpack::object_handle oh =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      uint8_t msg_type = oh.get().as<uint8_t>();
+
+      // Map msgpack types to the RequestType enum expected by server
+      switch (msg_type) {
+        case MSG_ASYNC_LP_REQUEST:
+        case MSG_ASYNC_MIP_REQUEST: return 0;  // SUBMIT_JOB
+        case MSG_STATUS_REQUEST: return 1;     // CHECK_STATUS
+        case MSG_GET_RESULT: return 2;         // GET_RESULT
+        case MSG_DELETE_REQUEST: return 3;     // DELETE_RESULT
+        case MSG_GET_LOGS: return 4;           // GET_LOGS
+        case MSG_CANCEL_REQUEST: return 5;     // CANCEL_JOB
+        case MSG_WAIT_REQUEST: return 6;       // WAIT_FOR_RESULT
+        default: return -1;
+      }
+    } catch (...) {
+    }
+    return -1;
+  }
+
+  std::string get_job_id(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return "";
+    try {
+      size_t offset = 0;
+      // Skip msg_type
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // Skip blocking
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      // Read job_id
+      msgpack::object_handle oh_job_id =
+        msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+      return oh_job_id.get().as<std::string>();
+    } catch (...) {
+    }
+    return "";
+  }
+
+  //============================================================================
+  // Server-side response serialization
+  //============================================================================
+
+  std::vector<uint8_t> serialize_submit_response(bool success, const std::string& result) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_SUBMIT_RESPONSE);
+    pk.pack(success);
+    pk.pack(result);                    // job_id on success, error message on failure
+    if (!success) { pk.pack(result); }  // error message duplicated for compatibility
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_status_response(int status_code,
+                                                 const std::string& message) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_STATUS_RESPONSE);
+    pk.pack(status_code);
+    pk.pack(message);
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_result_response(bool success,
+                                                 const std::vector<uint8_t>& result_data,
+                                                 const std::string& error_message,
+                                                 bool is_mip = false) override
+  {
+    // For result response, we prepend success flag then the actual solution data
+    // Note: is_mip flag is not needed here as msgpack just passes through raw data
+    (void)is_mip;  // Unused but required for interface compatibility
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack(success);
+    pk.pack(error_message);
+    // Append raw solution data
+    std::vector<uint8_t> response(buffer.data(), buffer.data() + buffer.size());
+    response.insert(response.end(), result_data.begin(), result_data.end());
+    return response;
+  }
+
+  std::vector<uint8_t> serialize_delete_response(bool success) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack(success);
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_logs_response(const std::string& job_id,
+                                               const std::vector<std::string>& log_lines,
+                                               int64_t nbytes,
+                                               bool job_exists) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_LOGS_RESPONSE);
+    pk.pack(job_exists);
+    pk.pack(nbytes);
+    pk.pack(log_lines);
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+  std::vector<uint8_t> serialize_cancel_response(bool success,
+                                                 const std::string& message,
+                                                 int status_code) override
+  {
+    msgpack::sbuffer buffer;
+    msgpack::packer<msgpack::sbuffer> pk(&buffer);
+    pk.pack_uint8(MSG_CANCEL_RESPONSE);
+    pk.pack(success);
+    pk.pack(message);
+    pk.pack(status_code);
+    return std::vector<uint8_t>(buffer.data(), buffer.data() + buffer.size());
+  }
+
+ private:
+  void pack_problem(msgpack::packer<msgpack::sbuffer>& pk,
+                    const mps_parser::data_model_view_t<i_t, f_t>& view)
+  {
+    // Field names match data_model_view_t (without trailing underscore)
+    auto offsets_span = view.get_constraint_matrix_offsets();
+    auto values_span  = view.get_constraint_matrix_values();
+    auto obj_span     = view.get_objective_coefficients();
+
+    // Count fields: base 17, plus optional fields
+    int num_fields            = 17;
+    auto init_primal_span     = view.get_initial_primal_solution();
+    auto init_dual_span       = view.get_initial_dual_solution();
+    const auto& var_names     = view.get_variable_names();
+    const auto& row_names_vec = view.get_row_names();
+    if (!var_names.empty()) num_fields++;
+    if (!row_names_vec.empty()) num_fields++;
+    if (init_primal_span.size() > 0) num_fields++;
+    if (init_dual_span.size() > 0) num_fields++;
+    if (view.has_quadratic_objective()) num_fields += 3;
+
+    pk.pack_map(num_fields);
+
+    // Problem metadata
+    pk.pack("problem_name");
+    pk.pack(view.get_problem_name());
+    pk.pack("objective_name");
+    pk.pack(view.get_objective_name());
+    pk.pack("maximize");
+    pk.pack(view.get_sense());
+    pk.pack("objective_scaling_factor");
+    pk.pack(static_cast<double>(view.get_objective_scaling_factor()));
+    pk.pack("objective_offset");
+    pk.pack(static_cast<double>(view.get_objective_offset()));
+
+    // Variable and row names (optional, matches protobuf)
+    if (!var_names.empty()) {
+      pk.pack("variable_names");
+      pk.pack_array(var_names.size());
+      for (const auto& name : var_names) {
+        pk.pack(name);
+      }
+    }
+    if (!row_names_vec.empty()) {
+      pk.pack("row_names");
+      pk.pack_array(row_names_vec.size());
+      for (const auto& name : row_names_vec) {
+        pk.pack(name);
+      }
+    }
+
+    // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_)
+    pk.pack("A");
+    pk.pack_array(values_span.size());
+    for (size_t i = 0; i < values_span.size(); ++i) {
+      pk.pack(static_cast<double>(values_span.data()[i]));
+    }
+
+    pk.pack("A_indices");
+    auto A_idx = view.get_constraint_matrix_indices();
+    pk.pack_array(A_idx.size());
+    for (size_t i = 0; i < A_idx.size(); ++i) {
+      pk.pack(static_cast<int64_t>(A_idx.data()[i]));
+    }
+
+    pk.pack("A_offsets");
+    pk.pack_array(offsets_span.size());
+    for (size_t i = 0; i < offsets_span.size(); ++i) {
+      pk.pack(static_cast<int64_t>(offsets_span.data()[i]));
+    }
+
+    // Objective coefficients c (name matches data_model_view_t: c_)
+    pk.pack("c");
+    pk.pack_array(obj_span.size());
+    for (size_t i = 0; i < obj_span.size(); ++i) {
+      pk.pack(static_cast<double>(obj_span.data()[i]));
+    }
+
+    // Variable bounds
+    pk.pack("variable_lower_bounds");
+    auto vlb = view.get_variable_lower_bounds();
+    pk.pack_array(vlb.size());
+    for (size_t i = 0; i < vlb.size(); ++i) {
+      pk.pack(static_cast<double>(vlb.data()[i]));
+    }
+
+    pk.pack("variable_upper_bounds");
+    auto vub = view.get_variable_upper_bounds();
+    pk.pack_array(vub.size());
+    for (size_t i = 0; i < vub.size(); ++i) {
+      pk.pack(static_cast<double>(vub.data()[i]));
+    }
+
+    // Constraint bounds b (RHS)
+    pk.pack("b");
+    auto b_span = view.get_constraint_bounds();
+    pk.pack_array(b_span.size());
+    for (size_t i = 0; i < b_span.size(); ++i) {
+      pk.pack(static_cast<double>(b_span.data()[i]));
+    }
+
+    // Row types
+    pk.pack("row_types");
+    auto rt_span = view.get_row_types();
+    pk.pack(std::string(rt_span.data(), rt_span.size()));
+
+    // Constraint lower/upper bounds
+    pk.pack("constraint_lower_bounds");
+    auto clb = view.get_constraint_lower_bounds();
+    if (clb.size() > 0) {
+      pk.pack_array(clb.size());
+      for (size_t i = 0; i < clb.size(); ++i) {
+        pk.pack(static_cast<double>(clb.data()[i]));
+      }
+    } else {
+      pk.pack_array(0);
+    }
+
+    pk.pack("constraint_upper_bounds");
+    auto cub = view.get_constraint_upper_bounds();
+    if (cub.size() > 0) {
+      pk.pack_array(cub.size());
+      for (size_t i = 0; i < cub.size(); ++i) {
+        pk.pack(static_cast<double>(cub.data()[i]));
+      }
+    } else {
+      pk.pack_array(0);
+    }
+
+    // Variable types (name matches data_model_view_t: variable_types_)
+    pk.pack("variable_types");
+    auto vt = view.get_variable_types();
+    pk.pack(std::string(vt.data(), vt.size()));
+
+    // Initial solutions (if available)
+    if (init_primal_span.size() > 0) {
+      pk.pack("initial_primal_solution");
+      pk.pack_array(init_primal_span.size());
+      for (size_t i = 0; i < init_primal_span.size(); ++i) {
+        pk.pack(static_cast<double>(init_primal_span.data()[i]));
+      }
+    }
+
+    if (init_dual_span.size() > 0) {
+      pk.pack("initial_dual_solution");
+      pk.pack_array(init_dual_span.size());
+      for (size_t i = 0; i < init_dual_span.size(); ++i) {
+        pk.pack(static_cast<double>(init_dual_span.data()[i]));
+      }
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (view.has_quadratic_objective()) {
+      pk.pack("Q_values");
+      auto q_vals = view.get_quadratic_objective_values();
+      pk.pack_array(q_vals.size());
+      for (size_t i = 0; i < q_vals.size(); ++i) {
+        pk.pack(static_cast<double>(q_vals.data()[i]));
+      }
+
+      pk.pack("Q_indices");
+      auto q_idx = view.get_quadratic_objective_indices();
+      pk.pack_array(q_idx.size());
+      for (size_t i = 0; i < q_idx.size(); ++i) {
+        pk.pack(static_cast<int64_t>(q_idx.data()[i]));
+      }
+
+      pk.pack("Q_offsets");
+      auto q_off = view.get_quadratic_objective_offsets();
+      pk.pack_array(q_off.size());
+      for (size_t i = 0; i < q_off.size(); ++i) {
+        pk.pack(static_cast<int64_t>(q_off.data()[i]));
+      }
+    }
+  }
+
+  void unpack_problem(const std::vector<uint8_t>& data,
+                      size_t& offset,
+                      mps_parser::mps_data_model_t<i_t, f_t>& mps_data)
+  {
+    // Field names match data_model_view_t (without trailing underscore)
+    msgpack::object_handle oh =
+      msgpack::unpack(reinterpret_cast<const char*>(data.data()), data.size(), offset);
+    auto problem_map = oh.get().as<std::map<std::string, msgpack::object>>();
+
+    // Problem metadata
+    if (problem_map.count("problem_name")) {
+      mps_data.set_problem_name(problem_map["problem_name"].as<std::string>());
+    }
+    if (problem_map.count("objective_name")) {
+      mps_data.set_objective_name(problem_map["objective_name"].as<std::string>());
+    }
+    if (problem_map.count("maximize")) {
+      mps_data.set_maximize(problem_map["maximize"].as<bool>());
+    }
+    if (problem_map.count("objective_scaling_factor")) {
+      mps_data.set_objective_scaling_factor(problem_map["objective_scaling_factor"].as<double>());
+    }
+    if (problem_map.count("objective_offset")) {
+      mps_data.set_objective_offset(problem_map["objective_offset"].as<double>());
+    }
+
+    // Constraint matrix A in CSR format
+    std::vector<f_t> A_values;
+    problem_map["A"].convert(A_values);
+    std::vector<i_t> A_indices;
+    problem_map["A_indices"].convert(A_indices);
+    std::vector<i_t> A_offsets;
+    problem_map["A_offsets"].convert(A_offsets);
+
+    mps_data.set_csr_constraint_matrix(A_values.data(),
+                                       static_cast<i_t>(A_values.size()),
+                                       A_indices.data(),
+                                       static_cast<i_t>(A_indices.size()),
+                                       A_offsets.data(),
+                                       static_cast<i_t>(A_offsets.size()));
+
+    // Objective coefficients c
+    std::vector<f_t> c;
+    problem_map["c"].convert(c);
+    mps_data.set_objective_coefficients(c.data(), static_cast<i_t>(c.size()));
+
+    // Variable and row names (optional)
+    if (problem_map.count("variable_names")) {
+      std::vector<std::string> var_names;
+      problem_map["variable_names"].convert(var_names);
+      if (!var_names.empty()) { mps_data.set_variable_names(var_names); }
+    }
+    if (problem_map.count("row_names")) {
+      std::vector<std::string> row_names;
+      problem_map["row_names"].convert(row_names);
+      if (!row_names.empty()) { mps_data.set_row_names(row_names); }
+    }
+
+    // Variable bounds
+    std::vector<f_t> var_lb, var_ub;
+    problem_map["variable_lower_bounds"].convert(var_lb);
+    problem_map["variable_upper_bounds"].convert(var_ub);
+    mps_data.set_variable_lower_bounds(var_lb.data(), static_cast<i_t>(var_lb.size()));
+    mps_data.set_variable_upper_bounds(var_ub.data(), static_cast<i_t>(var_ub.size()));
+
+    // Constraint bounds (prefer lower/upper bounds if available)
+    if (problem_map.count("constraint_lower_bounds")) {
+      std::vector<f_t> con_lb;
+      problem_map["constraint_lower_bounds"].convert(con_lb);
+      if (con_lb.size() > 0) {
+        std::vector<f_t> con_ub;
+        problem_map["constraint_upper_bounds"].convert(con_ub);
+        mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast<i_t>(con_lb.size()));
+        mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast<i_t>(con_ub.size()));
+      }
+    }
+
+    // Constraint bounds b (RHS) + row_types format
+    if (problem_map.count("b")) {
+      std::vector<f_t> b;
+      problem_map["b"].convert(b);
+      if (b.size() > 0) { mps_data.set_constraint_bounds(b.data(), static_cast<i_t>(b.size())); }
+    }
+
+    if (problem_map.count("row_types")) {
+      std::string row_types_str = problem_map["row_types"].as<std::string>();
+      if (!row_types_str.empty()) {
+        mps_data.set_row_types(row_types_str.data(), static_cast<i_t>(row_types_str.size()));
+      }
+    }
+
+    // Variable types (stored as string, matching data_model_view_t)
+    if (problem_map.count("variable_types")) {
+      std::string var_types_str = problem_map["variable_types"].as<std::string>();
+      if (!var_types_str.empty()) {
+        std::vector<char> var_types(var_types_str.begin(), var_types_str.end());
+        mps_data.set_variable_types(var_types);
+      }
+    }
+
+    // Initial solutions (if provided)
+    if (problem_map.count("initial_primal_solution")) {
+      std::vector<f_t> init_primal;
+      problem_map["initial_primal_solution"].convert(init_primal);
+      if (init_primal.size() > 0) {
+        mps_data.set_initial_primal_solution(init_primal.data(),
+                                             static_cast<i_t>(init_primal.size()));
+      }
+    }
+
+    if (problem_map.count("initial_dual_solution")) {
+      std::vector<f_t> init_dual;
+      problem_map["initial_dual_solution"].convert(init_dual);
+      if (init_dual.size() > 0) {
+        mps_data.set_initial_dual_solution(init_dual.data(), static_cast<i_t>(init_dual.size()));
+      }
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (problem_map.count("Q_values")) {
+      std::vector<f_t> Q_values;
+      std::vector<i_t> Q_indices;
+      std::vector<i_t> Q_offsets;
+      problem_map["Q_values"].convert(Q_values);
+      problem_map["Q_indices"].convert(Q_indices);
+      problem_map["Q_offsets"].convert(Q_offsets);
+
+      if (Q_values.size() > 0) {
+        mps_data.set_quadratic_objective_matrix(Q_values.data(),
+                                                static_cast<i_t>(Q_values.size()),
+                                                Q_indices.data(),
+                                                static_cast<i_t>(Q_indices.size()),
+                                                Q_offsets.data(),
+                                                static_cast<i_t>(Q_offsets.size()));
+      }
+    }
+  }
+};
+
+}  // namespace cuopt::linear_programming
+
+//============================================================================
+// Factory Functions (exported for dynamic loading)
+//============================================================================
+
+extern "C" {
+
+std::unique_ptr<cuopt::linear_programming::remote_serializer_t<int32_t, double>>
+create_cuopt_serializer_i32_f64()
+{
+  return std::make_unique<cuopt::linear_programming::msgpack_serializer_t<int32_t, double>>();
+}
+
+}  // extern "C"
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index e6a392d40..1886a4418 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -31,6 +31,8 @@
 
 #include <mps_parser/mps_data_model.hpp>
 
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+
 #include <raft/sparse/detail/cusparse_macros.h>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/common/nvtx.hpp>
@@ -293,13 +295,271 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
   }
 }
 
+// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path)
+template <typename i_t, typename f_t>
+static data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  return view;
+}
+
+// Helper struct to hold CPU copies of GPU data for remote solve
+template <typename i_t, typename f_t>
+struct cpu_problem_data_t {
+  std::vector<f_t> A_values;
+  std::vector<i_t> A_indices;
+  std::vector<i_t> A_offsets;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> constraint_lower_bounds;
+  std::vector<f_t> constraint_upper_bounds;
+  std::vector<f_t> objective_coefficients;
+  std::vector<f_t> variable_lower_bounds;
+  std::vector<f_t> variable_upper_bounds;
+  std::vector<char> variable_types;
+  std::vector<f_t> quadratic_objective_values;
+  std::vector<i_t> quadratic_objective_indices;
+  std::vector<i_t> quadratic_objective_offsets;
+  bool maximize;
+  f_t objective_scaling_factor;
+  f_t objective_offset;
+
+  data_model_view_t<i_t, f_t> create_view() const
+  {
+    data_model_view_t<i_t, f_t> v;
+    v.set_maximize(maximize);
+    v.set_objective_scaling_factor(objective_scaling_factor);
+    v.set_objective_offset(objective_offset);
+
+    if (!A_values.empty()) {
+      v.set_csr_constraint_matrix(A_values.data(),
+                                  A_values.size(),
+                                  A_indices.data(),
+                                  A_indices.size(),
+                                  A_offsets.data(),
+                                  A_offsets.size());
+    }
+    if (!constraint_bounds.empty()) {
+      v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+    }
+    if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) {
+      v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size());
+      v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size());
+    }
+    if (!objective_coefficients.empty()) {
+      v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+    if (!variable_lower_bounds.empty()) {
+      v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+    if (!variable_upper_bounds.empty()) {
+      v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+    if (!variable_types.empty()) {
+      v.set_variable_types(variable_types.data(), variable_types.size());
+    }
+    if (!quadratic_objective_values.empty()) {
+      v.set_quadratic_objective_matrix(quadratic_objective_values.data(),
+                                       quadratic_objective_values.size(),
+                                       quadratic_objective_indices.data(),
+                                       quadratic_objective_indices.size(),
+                                       quadratic_objective_offsets.data(),
+                                       quadratic_objective_offsets.size());
+    }
+    v.set_is_device_memory(false);
+    return v;
+  }
+};
+
+// Helper to copy GPU view data to CPU
+template <typename i_t, typename f_t>
+cpu_problem_data_t<i_t, f_t> copy_view_to_cpu(raft::handle_t const* handle_ptr,
+                                              const data_model_view_t<i_t, f_t>& gpu_view)
+{
+  cpu_problem_data_t<i_t, f_t> cpu_data;
+  auto stream = handle_ptr->get_stream();
+
+  cpu_data.maximize                 = gpu_view.get_sense();
+  cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor();
+  cpu_data.objective_offset         = gpu_view.get_objective_offset();
+
+  auto copy_to_host = [stream](auto& dst_vec, auto src_span) {
+    if (src_span.size() > 0) {
+      dst_vec.resize(src_span.size());
+      raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream);
+    }
+  };
+
+  copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values());
+  copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices());
+  copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets());
+  copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds());
+  copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds());
+  copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds());
+  copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients());
+  copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds());
+  copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds());
+  copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values());
+  copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices());
+  copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets());
+
+  // Variable types need special handling (char array)
+  auto var_types_span = gpu_view.get_variable_types();
+  if (var_types_span.size() > 0) {
+    cpu_data.variable_types.resize(var_types_span.size());
+    cudaMemcpyAsync(cpu_data.variable_types.data(),
+                    var_types_span.data(),
+                    var_types_span.size() * sizeof(char),
+                    cudaMemcpyDeviceToHost,
+                    stream);
+  }
+
+  // Synchronize to ensure all copies are complete
+  cudaStreamSynchronize(stream);
+
+  return cpu_data;
+}
+
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t> solve_mip(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
   mip_solver_settings_t<i_t, f_t> const& settings)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  // Create a view pointing to CPU data and delegate to the view-based overload.
+  // The view overload handles local vs remote solve automatically.
+  auto view = create_view_from_mps_data_model(mps_data_model);
+  view.set_is_device_memory(false);  // MPS data is always in CPU memory
+  return solve_mip(handle_ptr, view, settings);
+}
+
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip(raft::handle_t const* handle_ptr,
+                                   const data_model_view_t<i_t, f_t>& view,
+                                   mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  // Initialize logger for this overload (needed for early returns)
+  init_logger_t log(settings.log_file, settings.log_to_console);
+
+  // Check for remote solve configuration first
+  auto remote_config = get_remote_solve_config();
+
+  if (view.is_device_memory()) {
+    if (remote_config.has_value()) {
+      // GPU data + remote solve requested: need valid handle to copy GPU→CPU
+      if (handle_ptr == nullptr) {
+        CUOPT_LOG_ERROR(
+          "[solve_mip] Remote solve requested with GPU data but no CUDA handle. "
+          "This is an internal error - GPU data should not exist without CUDA initialization.");
+        return mip_solution_t<i_t, f_t>(
+          cuopt::logic_error("No CUDA handle for GPU data", cuopt::error_type_t::RuntimeError));
+      }
+      CUOPT_LOG_WARN(
+        "[solve_mip] Remote solve requested but data is on GPU. "
+        "Copying to CPU for serialization (performance impact).");
+      auto cpu_data = copy_view_to_cpu(handle_ptr, view);
+      auto cpu_view = cpu_data.create_view();
+
+      CUOPT_LOG_INFO(
+        "[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+        remote_config->host.c_str(),
+        remote_config->port);
+      // Remote solve with GPU data - serialize cpu_view and send to remote server
+      return solve_mip_remote(*remote_config, cpu_view, settings);
+    }
+
+    // Local solve: data already on GPU - convert view to optimization_problem_t and solve
+    auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
+    return solve_mip(op_problem, settings);
+  }
+
+  // Data is on CPU
+  if (remote_config.has_value()) {
+    CUOPT_LOG_INFO("[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                   remote_config->host.c_str(),
+                   remote_config->port);
+    // Remote solve with CPU data - serialize view and send to remote server
+    return solve_mip_remote(*remote_config, view, settings);
+  }
+
+  // Local solve with CPU data: copy to GPU and solve
+  auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
   return solve_mip(op_problem, settings);
 }
 
@@ -311,6 +571,11 @@ mip_solution_t<i_t, f_t> solve_mip(
   template mip_solution_t<int, F_TYPE> solve_mip(                           \
     raft::handle_t const* handle_ptr,                                       \
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model, \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                    \
+                                                                            \
+  template mip_solution_t<int, F_TYPE> solve_mip(                           \
+    raft::handle_t const* handle_ptr,                                       \
+    const data_model_view_t<int, F_TYPE>& view,                             \
     mip_solver_settings_t<int, F_TYPE> const& settings);
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu
index 2ce6d5700..fd8707732 100644
--- a/cpp/src/mip/solver_solution.cu
+++ b/cpp/src/mip/solver_solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -28,7 +28,8 @@ mip_solution_t<i_t, f_t>::mip_solution_t(rmm::device_uvector<f_t> solution,
                                          f_t max_variable_bound_violation,
                                          solver_stats_t<i_t, f_t> stats,
                                          std::vector<rmm::device_uvector<f_t>> solution_pool)
-  : solution_(std::move(solution)),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(solution))),
+    is_device_memory_(true),
     var_names_(std::move(var_names)),
     objective_(objective),
     mip_gap_(mip_gap),
@@ -46,7 +47,8 @@ template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_status,
                                          solver_stats_t<i_t, f_t> stats,
                                          rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     objective_(0),
     mip_gap_(0),
     termination_status_(termination_status),
@@ -61,7 +63,65 @@ mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_st
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status,
                                          rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
+    objective_(0),
+    mip_gap_(0),
+    termination_status_(mip_termination_status_t::NoTermination),
+    max_constraint_violation_(0),
+    max_int_violation_(0),
+    max_variable_bound_violation_(0),
+    error_status_(error_status)
+{
+}
+
+// CPU-only constructor for remote solve with solution data
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(std::vector<f_t> solution,
+                                         std::vector<std::string> var_names,
+                                         f_t objective,
+                                         f_t mip_gap,
+                                         mip_termination_status_t termination_status,
+                                         f_t max_constraint_violation,
+                                         f_t max_int_violation,
+                                         f_t max_variable_bound_violation,
+                                         solver_stats_t<i_t, f_t> stats)
+  : solution_host_(std::make_unique<std::vector<f_t>>(std::move(solution))),
+    is_device_memory_(false),
+    var_names_(std::move(var_names)),
+    objective_(objective),
+    mip_gap_(mip_gap),
+    termination_status_(termination_status),
+    max_constraint_violation_(max_constraint_violation),
+    max_int_violation_(max_int_violation),
+    max_variable_bound_violation_(max_variable_bound_violation),
+    stats_(stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_status,
+                                         solver_stats_t<i_t, f_t> stats)
+  : solution_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    objective_(0),
+    mip_gap_(0),
+    termination_status_(termination_status),
+    max_constraint_violation_(0),
+    max_int_violation_(0),
+    max_variable_bound_violation_(0),
+    stats_(stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status)
+  : solution_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
     objective_(0),
     mip_gap_(0),
     termination_status_(mip_termination_status_t::NoTermination),
@@ -78,16 +138,34 @@ const cuopt::logic_error& mip_solution_t<i_t, f_t>::get_error_status() const
   return error_status_;
 }
 
+template <typename i_t, typename f_t>
+bool mip_solution_t<i_t, f_t>::is_device_memory() const
+{
+  return is_device_memory_;
+}
+
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution() const
 {
-  return solution_;
+  return *solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution()
 {
-  return solution_;
+  return *solution_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution_host()
+{
+  return *solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution_host() const
+{
+  return *solution_host_;
 }
 
 template <typename i_t, typename f_t>
@@ -211,9 +289,16 @@ void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename,
   double objective_value = get_objective_value();
   auto& var_names        = get_variable_names();
   std::vector<f_t> solution;
-  solution.resize(solution_.size());
-  raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    solution.resize(solution_->size());
+    raft::copy(solution.data(), solution_->data(), solution_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    solution = *solution_host_;
+  }
 
   solution_writer_t::write_solution_to_sol_file(
     std::string(filename), status, objective_value, var_names, solution);
@@ -233,6 +318,121 @@ void mip_solution_t<i_t, f_t>::log_summary() const
   CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time());
 }
 
+//============================================================================
+// Setters for remote solve deserialization
+//============================================================================
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_solution_host(std::vector<f_t> solution)
+{
+  solution_host_    = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_ = false;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_objective(f_t value)
+{
+  objective_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_mip_gap(f_t value)
+{
+  mip_gap_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_solution_bound(f_t value)
+{
+  stats_.solution_bound = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_total_solve_time(double value)
+{
+  stats_.total_solve_time = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_presolve_time(double value)
+{
+  stats_.presolve_time = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_constraint_violation(f_t value)
+{
+  max_constraint_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_int_violation(f_t value)
+{
+  max_int_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_variable_bound_violation(f_t value)
+{
+  max_variable_bound_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_nodes(i_t value)
+{
+  stats_.num_nodes = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_simplex_iterations(i_t value)
+{
+  stats_.num_simplex_iterations = value;
+}
+
+template <typename i_t, typename f_t>
+std::string mip_solution_t<i_t, f_t>::get_error_string() const
+{
+  return error_status_.what();
+}
+
+template <typename i_t, typename f_t>
+i_t mip_solution_t<i_t, f_t>::get_nodes() const
+{
+  return stats_.num_nodes;
+}
+
+template <typename i_t, typename f_t>
+i_t mip_solution_t<i_t, f_t>::get_simplex_iterations() const
+{
+  return stats_.num_simplex_iterations;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (!is_device_memory_) {
+    // Already on CPU, nothing to do
+    return;
+  }
+
+  // Initialize host storage if needed
+  if (!solution_host_) { solution_host_ = std::make_unique<std::vector<f_t>>(); }
+
+  // Copy solution
+  if (solution_ && solution_->size() > 0) {
+    solution_host_->resize(solution_->size());
+    raft::copy(solution_host_->data(), solution_->data(), solution_->size(), stream_view.value());
+
+    // Synchronize to ensure copy is complete
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  }
+
+  // Clear GPU storage to free memory
+  solution_.reset();
+
+  is_device_memory_ = false;
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class mip_solution_t<int, float>;
 #endif
diff --git a/cpp/tests/linear_programming/CMakeLists.txt b/cpp/tests/linear_programming/CMakeLists.txt
index c091751f9..11ea6fd1f 100644
--- a/cpp/tests/linear_programming/CMakeLists.txt
+++ b/cpp/tests/linear_programming/CMakeLists.txt
@@ -6,6 +6,7 @@
 ConfigureTest(LP_UNIT_TEST
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/optimization_problem_test.cu
     ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solver_settings_test.cu
+    ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/remote_solve_test.cu
 )# ##################################################################################################
 # - Linear programming PDLP tests ----------------------------------------------------------------------
 ConfigureTest(PDLP_TEST
diff --git a/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu b/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu
new file mode 100644
index 000000000..08cfa7342
--- /dev/null
+++ b/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu
@@ -0,0 +1,267 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <utilities/common_utils.hpp>
+
+#include <cuopt/linear_programming/data_model_view.hpp>
+#include <cuopt/linear_programming/optimization_problem.hpp>
+#include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+
+namespace cuopt::linear_programming {
+
+// Test fixture that manages environment variables
+class RemoteSolveTest : public ::testing::Test {
+ protected:
+  void SetUp() override
+  {
+    // Save original env vars if they exist
+    const char* host = std::getenv("CUOPT_REMOTE_HOST");
+    const char* port = std::getenv("CUOPT_REMOTE_PORT");
+    original_host_   = host ? host : "";
+    original_port_   = port ? port : "";
+    host_was_set_    = (host != nullptr);
+    port_was_set_    = (port != nullptr);
+  }
+
+  void TearDown() override
+  {
+    // Restore original env vars
+    if (host_was_set_) {
+      setenv("CUOPT_REMOTE_HOST", original_host_.c_str(), 1);
+    } else {
+      unsetenv("CUOPT_REMOTE_HOST");
+    }
+    if (port_was_set_) {
+      setenv("CUOPT_REMOTE_PORT", original_port_.c_str(), 1);
+    } else {
+      unsetenv("CUOPT_REMOTE_PORT");
+    }
+  }
+
+  void enable_remote_solve()
+  {
+    setenv("CUOPT_REMOTE_HOST", "localhost", 1);
+    setenv("CUOPT_REMOTE_PORT", "5000", 1);
+  }
+
+  void disable_remote_solve()
+  {
+    unsetenv("CUOPT_REMOTE_HOST");
+    unsetenv("CUOPT_REMOTE_PORT");
+  }
+
+ private:
+  std::string original_host_;
+  std::string original_port_;
+  bool host_was_set_;
+  bool port_was_set_;
+};
+
+// White-box test: GPU data + remote solve enabled
+// This tests the edge case where data is on GPU but user wants remote solve
+TEST_F(RemoteSolveTest, gpu_data_with_remote_solve_enabled)
+{
+  const raft::handle_t handle_{};
+
+  // Create a simple 2x2 LP problem directly on GPU
+  // minimize: 0.2*x1 + 0.1*x2
+  // subject to: 3*x1 + 4*x2 <= 5.4
+  //             2.7*x1 + 10.1*x2 <= 4.9
+  //             x1, x2 >= 0
+
+  // Allocate GPU memory for problem data
+  rmm::device_uvector<double> A_values(4, handle_.get_stream());
+  rmm::device_uvector<int> A_indices(4, handle_.get_stream());
+  rmm::device_uvector<int> A_offsets(3, handle_.get_stream());
+  rmm::device_uvector<double> constraint_bounds(2, handle_.get_stream());
+  rmm::device_uvector<double> objective_coeffs(2, handle_.get_stream());
+  rmm::device_uvector<double> var_lower(2, handle_.get_stream());
+  rmm::device_uvector<double> var_upper(2, handle_.get_stream());
+
+  // Host data
+  std::vector<double> h_A_values  = {3.0, 4.0, 2.7, 10.1};
+  std::vector<int> h_A_indices    = {0, 1, 0, 1};
+  std::vector<int> h_A_offsets    = {0, 2, 4};
+  std::vector<double> h_bounds    = {5.4, 4.9};
+  std::vector<double> h_obj       = {0.2, 0.1};
+  std::vector<double> h_var_lower = {0.0, 0.0};
+  std::vector<double> h_var_upper = {1e20, 1e20};
+
+  // Copy to GPU
+  raft::copy(A_values.data(), h_A_values.data(), 4, handle_.get_stream());
+  raft::copy(A_indices.data(), h_A_indices.data(), 4, handle_.get_stream());
+  raft::copy(A_offsets.data(), h_A_offsets.data(), 3, handle_.get_stream());
+  raft::copy(constraint_bounds.data(), h_bounds.data(), 2, handle_.get_stream());
+  raft::copy(objective_coeffs.data(), h_obj.data(), 2, handle_.get_stream());
+  raft::copy(var_lower.data(), h_var_lower.data(), 2, handle_.get_stream());
+  raft::copy(var_upper.data(), h_var_upper.data(), 2, handle_.get_stream());
+  handle_.sync_stream();
+
+  // Create a data_model_view_t pointing to GPU data
+  data_model_view_t<int, double> view;
+  view.set_maximize(false);
+  view.set_csr_constraint_matrix(A_values.data(), 4, A_indices.data(), 4, A_offsets.data(), 3);
+  view.set_constraint_bounds(constraint_bounds.data(), 2);
+  view.set_objective_coefficients(objective_coeffs.data(), 2);
+  view.set_variable_lower_bounds(var_lower.data(), 2);
+  view.set_variable_upper_bounds(var_upper.data(), 2);
+  view.set_is_device_memory(true);  // Mark as GPU data
+
+  // Enable remote solve
+  enable_remote_solve();
+
+  // Verify remote solve is enabled
+  ASSERT_TRUE(is_remote_solve_enabled());
+
+  // Call solve_lp with GPU view + remote solve enabled
+  // This should trigger the GPU->CPU copy path and return the "not implemented" error
+  pdlp_solver_settings_t<int, double> settings;
+  auto solution = solve_lp(&handle_, view, settings);
+
+  // Since remote solve is not yet implemented, we expect an error status
+  // The key thing is that we didn't crash and the GPU->CPU copy worked
+  EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::NumericalError);
+}
+
+// Control test: GPU data + local solve (no remote)
+TEST_F(RemoteSolveTest, gpu_data_with_local_solve)
+{
+  const raft::handle_t handle_{};
+
+  // Same problem setup as above
+  rmm::device_uvector<double> A_values(4, handle_.get_stream());
+  rmm::device_uvector<int> A_indices(4, handle_.get_stream());
+  rmm::device_uvector<int> A_offsets(3, handle_.get_stream());
+  rmm::device_uvector<double> constraint_lower(2, handle_.get_stream());
+  rmm::device_uvector<double> constraint_upper(2, handle_.get_stream());
+  rmm::device_uvector<double> objective_coeffs(2, handle_.get_stream());
+  rmm::device_uvector<double> var_lower(2, handle_.get_stream());
+  rmm::device_uvector<double> var_upper(2, handle_.get_stream());
+
+  std::vector<double> h_A_values         = {3.0, 4.0, 2.7, 10.1};
+  std::vector<int> h_A_indices           = {0, 1, 0, 1};
+  std::vector<int> h_A_offsets           = {0, 2, 4};
+  std::vector<double> h_constraint_lower = {-1e20, -1e20};  // -inf (no lower bound)
+  std::vector<double> h_constraint_upper = {5.4, 4.9};      // <= constraints
+  std::vector<double> h_obj              = {0.2, 0.1};
+  std::vector<double> h_var_lower        = {0.0, 0.0};
+  std::vector<double> h_var_upper        = {1e20, 1e20};
+
+  raft::copy(A_values.data(), h_A_values.data(), 4, handle_.get_stream());
+  raft::copy(A_indices.data(), h_A_indices.data(), 4, handle_.get_stream());
+  raft::copy(A_offsets.data(), h_A_offsets.data(), 3, handle_.get_stream());
+  raft::copy(constraint_lower.data(), h_constraint_lower.data(), 2, handle_.get_stream());
+  raft::copy(constraint_upper.data(), h_constraint_upper.data(), 2, handle_.get_stream());
+  raft::copy(objective_coeffs.data(), h_obj.data(), 2, handle_.get_stream());
+  raft::copy(var_lower.data(), h_var_lower.data(), 2, handle_.get_stream());
+  raft::copy(var_upper.data(), h_var_upper.data(), 2, handle_.get_stream());
+  handle_.sync_stream();
+
+  data_model_view_t<int, double> view;
+  view.set_maximize(false);
+  view.set_csr_constraint_matrix(A_values.data(), 4, A_indices.data(), 4, A_offsets.data(), 3);
+  view.set_constraint_lower_bounds(constraint_lower.data(), 2);
+  view.set_constraint_upper_bounds(constraint_upper.data(), 2);
+  view.set_objective_coefficients(objective_coeffs.data(), 2);
+  view.set_variable_lower_bounds(var_lower.data(), 2);
+  view.set_variable_upper_bounds(var_upper.data(), 2);
+  view.set_is_device_memory(true);
+
+  // Disable remote solve
+  disable_remote_solve();
+
+  // Verify remote solve is disabled
+  ASSERT_FALSE(is_remote_solve_enabled());
+
+  // Call solve_lp - should solve locally
+  pdlp_solver_settings_t<int, double> settings;
+  auto solution = solve_lp(&handle_, view, settings);
+
+  // Should succeed with optimal status
+  EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::Optimal);
+}
+
+// Test: CPU data + remote solve enabled
+TEST_F(RemoteSolveTest, cpu_data_with_remote_solve_enabled)
+{
+  const raft::handle_t handle_{};
+
+  // Host data (CPU)
+  std::vector<double> h_A_values  = {3.0, 4.0, 2.7, 10.1};
+  std::vector<int> h_A_indices    = {0, 1, 0, 1};
+  std::vector<int> h_A_offsets    = {0, 2, 4};
+  std::vector<double> h_bounds    = {5.4, 4.9};
+  std::vector<double> h_obj       = {0.2, 0.1};
+  std::vector<double> h_var_lower = {0.0, 0.0};
+  std::vector<double> h_var_upper = {1e20, 1e20};
+
+  // Create view pointing to CPU data
+  data_model_view_t<int, double> view;
+  view.set_maximize(false);
+  view.set_csr_constraint_matrix(
+    h_A_values.data(), 4, h_A_indices.data(), 4, h_A_offsets.data(), 3);
+  view.set_constraint_bounds(h_bounds.data(), 2);
+  view.set_objective_coefficients(h_obj.data(), 2);
+  view.set_variable_lower_bounds(h_var_lower.data(), 2);
+  view.set_variable_upper_bounds(h_var_upper.data(), 2);
+  view.set_is_device_memory(false);  // CPU data
+
+  // Enable remote solve
+  enable_remote_solve();
+  ASSERT_TRUE(is_remote_solve_enabled());
+
+  // Should go to remote path (and return not implemented error)
+  pdlp_solver_settings_t<int, double> settings;
+  auto solution = solve_lp(&handle_, view, settings);
+
+  EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::NumericalError);
+}
+
+// Test: CPU data + local solve
+TEST_F(RemoteSolveTest, cpu_data_with_local_solve)
+{
+  const raft::handle_t handle_{};
+
+  std::vector<double> h_A_values         = {3.0, 4.0, 2.7, 10.1};
+  std::vector<int> h_A_indices           = {0, 1, 0, 1};
+  std::vector<int> h_A_offsets           = {0, 2, 4};
+  std::vector<double> h_constraint_lower = {-1e20, -1e20};  // -inf (no lower bound)
+  std::vector<double> h_constraint_upper = {5.4, 4.9};      // <= constraints
+  std::vector<double> h_obj              = {0.2, 0.1};
+  std::vector<double> h_var_lower        = {0.0, 0.0};
+  std::vector<double> h_var_upper        = {1e20, 1e20};
+
+  data_model_view_t<int, double> view;
+  view.set_maximize(false);
+  view.set_csr_constraint_matrix(
+    h_A_values.data(), 4, h_A_indices.data(), 4, h_A_offsets.data(), 3);
+  view.set_constraint_lower_bounds(h_constraint_lower.data(), 2);
+  view.set_constraint_upper_bounds(h_constraint_upper.data(), 2);
+  view.set_objective_coefficients(h_obj.data(), 2);
+  view.set_variable_lower_bounds(h_var_lower.data(), 2);
+  view.set_variable_upper_bounds(h_var_upper.data(), 2);
+  view.set_is_device_memory(false);
+
+  disable_remote_solve();
+  ASSERT_FALSE(is_remote_solve_enabled());
+
+  // Should copy to GPU and solve locally
+  pdlp_solver_settings_t<int, double> settings;
+  auto solution = solve_lp(&handle_, view, settings);
+
+  EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::Optimal);
+}
+
+}  // namespace cuopt::linear_programming
diff --git a/dependencies.yaml b/dependencies.yaml
index 09faffe52..6593e695c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -137,6 +137,7 @@ files:
       - depends_on_librmm
       - depends_on_rapids_logger
       - depends_on_mps_parser
+      - depends_on_protobuf
   py_run_libcuopt:
     output: pyproject
     pyproject_dir: python/libcuopt
@@ -302,6 +303,7 @@ dependencies:
           - tbb-devel
           - zlib
           - bzip2
+          - libprotobuf
   test_cpp:
     common:
       - output_types: [conda]
@@ -504,6 +506,11 @@ dependencies:
           # pip recognizes the index as a global option for the requirements.txt file
           - --extra-index-url=https://pypi.nvidia.com
           - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+  depends_on_protobuf:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - protobuf
   depends_on_libraft_headers:
     common:
       - output_types: conda
diff --git a/dev_scripts/build_c_test.sh b/dev_scripts/build_c_test.sh
new file mode 100755
index 000000000..a1558536b
--- /dev/null
+++ b/dev_scripts/build_c_test.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Build the C API test program
+#
+# Usage:
+#   ./build_c_test.sh
+#
+# Prerequisites:
+#   - Activate conda environment with cuopt installed
+#   - CONDA_PREFIX must be set
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+if [ -z "$CONDA_PREFIX" ]; then
+    echo "Error: CONDA_PREFIX not set. Activate a conda environment first."
+    exit 1
+fi
+
+echo "Building test_c_api..."
+gcc -I "$CONDA_PREFIX/include" \
+    -L "$CONDA_PREFIX/lib" \
+    -Wl,-rpath,"$CONDA_PREFIX/lib" \
+    -o "$SCRIPT_DIR/test_c_api" \
+    "$SCRIPT_DIR/test_c_api.c" \
+    -lcuopt
+
+echo "Built: $SCRIPT_DIR/test_c_api"
+echo ""
+echo "Usage:"
+echo "  # Local solve:"
+echo "  $SCRIPT_DIR/test_c_api /path/to/problem.mps"
+echo ""
+echo "  # Remote solve:"
+echo "  CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 $SCRIPT_DIR/test_c_api /path/to/problem.mps"
diff --git a/dev_scripts/test_c_api.c b/dev_scripts/test_c_api.c
new file mode 100644
index 000000000..ae800b9cd
--- /dev/null
+++ b/dev_scripts/test_c_api.c
@@ -0,0 +1,179 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/*
+ * Test script for cuOpt C API - works with both local and remote solve.
+ *
+ * Usage:
+ *   # Local solve (default):
+ *   ./test_c_api /path/to/problem.mps
+ *
+ *   # Remote solve (set environment variables first):
+ *   CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 ./test_c_api /path/to/problem.mps
+ *
+ * Build:
+ *   gcc -I $CONDA_PREFIX/include -L $CONDA_PREFIX/lib -Wl,-rpath,$CONDA_PREFIX/lib \
+ *       -o test_c_api test_c_api.c -lcuopt
+ *
+ * Example:
+ *   ./test_c_api /home/tmckay/repos/HiGHS/check/instances/afiro.mps
+ */
+
+#include <cuopt/linear_programming/cuopt_c.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+const char* termination_status_to_string(cuopt_int_t termination_status)
+{
+  switch (termination_status) {
+    case CUOPT_TERIMINATION_STATUS_OPTIMAL:
+      return "Optimal";
+    case CUOPT_TERIMINATION_STATUS_INFEASIBLE:
+      return "Infeasible";
+    case CUOPT_TERIMINATION_STATUS_UNBOUNDED:
+      return "Unbounded";
+    case CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT:
+      return "Iteration limit";
+    case CUOPT_TERIMINATION_STATUS_TIME_LIMIT:
+      return "Time limit";
+    case CUOPT_TERIMINATION_STATUS_NUMERICAL_ERROR:
+      return "Numerical error";
+    case CUOPT_TERIMINATION_STATUS_PRIMAL_FEASIBLE:
+      return "Primal feasible";
+    case CUOPT_TERIMINATION_STATUS_FEASIBLE_FOUND:
+      return "Feasible found";
+    default:
+      return "Unknown";
+  }
+}
+
+cuopt_int_t solve_mps_file(const char* filename)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings = NULL;
+  cuOptSolution solution = NULL;
+  cuopt_int_t status;
+  cuopt_float_t time;
+  cuopt_int_t termination_status;
+  cuopt_float_t objective_value;
+  cuopt_int_t num_variables;
+  cuopt_float_t* solution_values = NULL;
+
+  // Check for remote solve configuration
+  const char* remote_host = getenv("CUOPT_REMOTE_HOST");
+  const char* remote_port = getenv("CUOPT_REMOTE_PORT");
+  if (remote_host && remote_port) {
+    printf("Remote solve enabled: %s:%s\n", remote_host, remote_port);
+  } else {
+    printf("Local solve (no CUOPT_REMOTE_HOST/PORT set)\n");
+  }
+
+  printf("Reading MPS file: %s\n", filename);
+
+  // Create the problem from MPS file
+  status = cuOptReadProblem(filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating problem from MPS file: %d\n", status);
+    goto DONE;
+  }
+
+  // Get problem size
+  status = cuOptGetNumVariables(problem, &num_variables);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting number of variables: %d\n", status);
+    goto DONE;
+  }
+
+  // Create solver settings
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings: %d\n", status);
+    goto DONE;
+  }
+
+  // Set solver parameters
+  status = cuOptSetFloatParameter(settings, CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, 1e-6);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting optimality tolerance: %d\n", status);
+    goto DONE;
+  }
+
+  // Solve the problem
+  printf("Solving...\n");
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving problem: %d\n", status);
+    goto DONE;
+  }
+
+  // Get solution information
+  status = cuOptGetSolveTime(solution, &time);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting solve time: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetTerminationStatus(solution, &termination_status);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting termination status: %d\n", status);
+    goto DONE;
+  }
+
+  status = cuOptGetObjectiveValue(solution, &objective_value);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting objective value: %d\n", status);
+    goto DONE;
+  }
+
+  // Print results
+  printf("\nResults:\n");
+  printf("----------------------------------------\n");
+  printf("Number of variables: %d\n", num_variables);
+  printf("Status: %s (%d)\n", termination_status_to_string(termination_status), termination_status);
+  printf("Objective: %e\n", objective_value);
+  printf("Solve time: %.3f seconds\n", time);
+
+  // Get and print solution variables
+  solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t));
+  status = cuOptGetPrimalSolution(solution, solution_values);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting solution values: %d\n", status);
+    goto DONE;
+  }
+
+  printf("\nPrimal solution (first 10 of %d variables):\n", num_variables);
+  for (cuopt_int_t i = 0; i < (num_variables < 10 ? num_variables : 10); i++) {
+    printf("  x%d = %f\n", i + 1, solution_values[i]);
+  }
+  if (num_variables > 10) {
+    printf("  ... (%d more variables)\n", num_variables - 10);
+  }
+
+  printf("\nDone!\n");
+
+DONE:
+  free(solution_values);
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+
+  return status;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc != 2) {
+    printf("Usage: %s <mps_file_path>\n", argv[0]);
+    printf("\nSet CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT for remote solve.\n");
+    return 1;
+  }
+
+  cuopt_int_t status = solve_mps_file(argv[1]);
+
+  if (status == CUOPT_SUCCESS) {
+    return 0;
+  } else {
+    printf("\nSolver failed with status: %d\n", status);
+    return 1;
+  }
+}
diff --git a/dev_scripts/test_pipe_cancel_restart.py b/dev_scripts/test_pipe_cancel_restart.py
new file mode 100644
index 000000000..963d382d6
--- /dev/null
+++ b/dev_scripts/test_pipe_cancel_restart.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Test job cancellation with pipe mode and verify worker restart/pipe recreation.
+
+This test:
+1. Submits a long-running LP job (with many iterations)
+2. Cancels it while running
+3. Verifies the cancellation succeeded
+4. Submits a simple job and verifies it completes (tests pipe recreation)
+"""
+
+import socket
+import struct
+import time
+
+# Import the generated protobuf module
+import sys
+
+sys.path.insert(0, "/home/tmckay/repos/nvidia-cuopt/cpp/build")
+import cuopt_remote_pb2
+
+HOST = "localhost"
+PORT = 8765
+
+
+def send_recv(sock, data):
+    """Send request and receive response with uint64 size prefix."""
+    # Send size (uint64) + data
+    sock.sendall(struct.pack("<Q", len(data)) + data)
+
+    # Receive size (uint64)
+    size_data = b""
+    while len(size_data) < 8:
+        chunk = sock.recv(8 - len(size_data))
+        if not chunk:
+            raise ConnectionError("Connection closed")
+        size_data += chunk
+
+    size = struct.unpack("<Q", size_data)[0]
+
+    # Receive response
+    response = b""
+    while len(response) < size:
+        chunk = sock.recv(min(4096, size - len(response)))
+        if not chunk:
+            raise ConnectionError("Connection closed")
+        response += chunk
+
+    return response
+
+
+def create_long_running_lp():
+    """Create an LP problem that takes a while to solve."""
+    # Create a much larger problem that will run longer
+    n_vars = 5000
+    n_constraints = 3000
+
+    req = cuopt_remote_pb2.AsyncRequest()
+    req.request_type = cuopt_remote_pb2.SUBMIT_JOB
+    req.blocking = False
+
+    lp = req.lp_request
+    prob = lp.problem
+    prob.maximize = False
+
+    # Objective: sum of all variables
+    prob.c.extend([1.0] * n_vars)
+
+    # Variable bounds: 0 <= x_i <= 10
+    prob.variable_lower_bounds.extend([0.0] * n_vars)
+    prob.variable_upper_bounds.extend([10.0] * n_vars)
+
+    # Constraint bounds: sum constraints
+    prob.constraint_lower_bounds.extend([1.0] * n_constraints)
+    prob.constraint_upper_bounds.extend([float("inf")] * n_constraints)
+
+    # Sparse constraint matrix (each constraint involves ~10 variables)
+    offsets = [0]
+    indices = []
+    values = []
+
+    for i in range(n_constraints):
+        # Each constraint: x[i] + x[i+1] + ... + x[i+9] >= 1
+        for j in range(10):
+            var_idx = (i + j) % n_vars
+            indices.append(var_idx)
+            values.append(1.0)
+        offsets.append(len(indices))
+
+    prob.A_offsets.extend(offsets)
+    prob.A_indices.extend(indices)
+    prob.A.extend(values)
+
+    # Row types: all >= constraints
+    prob.row_types = bytes([ord("G")] * n_constraints)
+
+    # Settings to make it run longer
+    lp.settings.log_to_console = True
+    lp.settings.time_limit = 60.0  # Allow up to 60 seconds
+
+    return req.SerializeToString()
+
+
+def create_simple_lp():
+    """Create a simple LP that solves quickly."""
+    req = cuopt_remote_pb2.AsyncRequest()
+    req.request_type = cuopt_remote_pb2.SUBMIT_JOB
+    req.blocking = False
+
+    lp = req.lp_request
+    prob = lp.problem
+    prob.maximize = False
+
+    # Simple: minimize x + 2y subject to x + y >= 1, x,y >= 0
+    prob.c.extend([1.0, 2.0])
+    prob.variable_lower_bounds.extend([0.0, 0.0])
+    prob.variable_upper_bounds.extend([float("inf"), float("inf")])
+    prob.constraint_lower_bounds.extend([1.0])
+    prob.constraint_upper_bounds.extend([float("inf")])
+    prob.A_offsets.extend([0, 2])
+    prob.A_indices.extend([0, 1])
+    prob.A.extend([1.0, 1.0])
+    prob.row_types = bytes([ord("G")])
+    lp.settings.log_to_console = True
+
+    return req.SerializeToString()
+
+
+def submit_job(sock, request_data):
+    """Submit a job and return the job_id."""
+    response_data = send_recv(sock, request_data)
+    response = cuopt_remote_pb2.AsyncResponse()
+    response.ParseFromString(response_data)
+
+    if response.submit_response.status != cuopt_remote_pb2.SUCCESS:
+        raise Exception(f"Submit failed: {response.submit_response.message}")
+
+    return (
+        response.submit_response.job_id.decode()
+        if isinstance(response.submit_response.job_id, bytes)
+        else response.submit_response.job_id
+    )
+
+
+def check_status(sock, job_id):
+    """Check job status."""
+    req = cuopt_remote_pb2.AsyncRequest()
+    req.request_type = cuopt_remote_pb2.CHECK_STATUS
+    req.job_id = job_id.encode() if isinstance(job_id, str) else job_id
+
+    response_data = send_recv(sock, req.SerializeToString())
+    response = cuopt_remote_pb2.AsyncResponse()
+    response.ParseFromString(response_data)
+
+    return (
+        response.status_response.job_status,
+        response.status_response.message,
+    )
+
+
+def cancel_job(sock, job_id):
+    """Cancel a job."""
+    req = cuopt_remote_pb2.AsyncRequest()
+    req.request_type = cuopt_remote_pb2.CANCEL_JOB
+    req.job_id = job_id.encode() if isinstance(job_id, str) else job_id
+
+    response_data = send_recv(sock, req.SerializeToString())
+    response = cuopt_remote_pb2.AsyncResponse()
+    response.ParseFromString(response_data)
+
+    success = response.cancel_response.status == cuopt_remote_pb2.SUCCESS
+    return success, response.cancel_response.message
+
+
+def wait_for_result(sock, job_id):
+    """Wait for job to complete using WAIT_FOR_RESULT."""
+    req = cuopt_remote_pb2.AsyncRequest()
+    req.request_type = cuopt_remote_pb2.WAIT_FOR_RESULT
+    req.job_id = job_id.encode() if isinstance(job_id, str) else job_id
+
+    response_data = send_recv(sock, req.SerializeToString())
+    response = cuopt_remote_pb2.AsyncResponse()
+    response.ParseFromString(response_data)
+
+    return (
+        response.result_response.status == cuopt_remote_pb2.SUCCESS,
+        response,
+    )
+
+
+def main():
+    print("=" * 60)
+    print("Test: Job Cancellation with Pipe Mode + Worker Restart")
+    print("=" * 60)
+
+    # Step 1: Submit a long-running job
+    print("\n[1] Submitting long-running LP job...")
+    sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock1.connect((HOST, PORT))
+
+    long_job_data = create_long_running_lp()
+    job_id = submit_job(sock1, long_job_data)
+    print(f"    Job submitted: {job_id}")
+    sock1.close()
+
+    # Step 2: Wait a bit for job to start processing
+    print("\n[2] Waiting for job to start processing...")
+    time.sleep(2)
+
+    sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock2.connect((HOST, PORT))
+    status, msg = check_status(sock2, job_id)
+    print(f"    Status: {cuopt_remote_pb2.JobStatus.Name(status)} - {msg}")
+    sock2.close()
+
+    # Step 3: Cancel the job
+    print("\n[3] Cancelling the job...")
+    sock3 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock3.connect((HOST, PORT))
+    success, msg = cancel_job(sock3, job_id)
+    print(f"    Cancel result: success={success}, message={msg}")
+    sock3.close()
+
+    # Step 4: Verify job is cancelled
+    print("\n[4] Verifying job status after cancellation...")
+    time.sleep(
+        1
+    )  # Give time for worker to be killed and result to be recorded
+
+    sock4 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock4.connect((HOST, PORT))
+    status, msg = check_status(sock4, job_id)
+    print(f"    Status: {cuopt_remote_pb2.JobStatus.Name(status)} - {msg}")
+    sock4.close()
+
+    if status != cuopt_remote_pb2.CANCELLED:
+        print(
+            f"    WARNING: Expected CANCELLED, got {cuopt_remote_pb2.JobStatus.Name(status)}"
+        )
+    else:
+        print("    ✓ Job successfully cancelled!")
+
+    # Step 5: Wait a bit for worker to restart with new pipes
+    print("\n[5] Waiting for worker to restart (with new pipes)...")
+    time.sleep(2)
+
+    # Step 6: Submit a new simple job to test pipe recreation
+    print("\n[6] Submitting new simple LP job to test pipe recreation...")
+    sock5 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock5.connect((HOST, PORT))
+
+    simple_job_data = create_simple_lp()
+    job_id2 = submit_job(sock5, simple_job_data)
+    print(f"    Job submitted: {job_id2}")
+    sock5.close()
+
+    # Step 7: Wait for the new job to complete
+    print("\n[7] Waiting for new job to complete...")
+    sock6 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock6.connect((HOST, PORT))
+    success, response = wait_for_result(sock6, job_id2)
+    sock6.close()
+
+    if success:
+        print("    ✓ New job completed successfully!")
+        if response.result_response.HasField("lp_solution"):
+            sol = response.result_response.lp_solution
+            obj = sol.primal_objective
+            print(f"    Objective value: {obj}")
+            if abs(obj - 1.0) < 0.01:
+                print("    ✓ Objective matches expected value (1.0)!")
+            else:
+                print(f"    WARNING: Expected objective ~1.0, got {obj}")
+    else:
+        print("    ✗ New job failed!")
+        return 1
+
+    print("\n" + "=" * 60)
+    print("TEST PASSED: Cancellation and pipe recreation work correctly!")
+    print("=" * 60)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/dev_scripts/test_python_api.py b/dev_scripts/test_python_api.py
new file mode 100755
index 000000000..b01fa506c
--- /dev/null
+++ b/dev_scripts/test_python_api.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Test script for cuOpt Python API - works with both local and remote solve.
+
+Usage:
+    # Local solve (default):
+    python test_python_api.py /path/to/problem.mps
+
+    # Remote solve (set environment variables first):
+    CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 python test_python_api.py /path/to/problem.mps
+
+Example:
+    python test_python_api.py /home/tmckay/repos/HiGHS/check/instances/afiro.mps
+"""
+
+import os
+import sys
+
+import cuopt_mps_parser
+from cuopt.linear_programming import solver, solver_settings
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <mps_file_path>")
+        sys.exit(1)
+
+    mps_file = sys.argv[1]
+    if not os.path.exists(mps_file):
+        print(f"Error: File not found: {mps_file}")
+        sys.exit(1)
+
+    # Check if remote solve is configured
+    remote_host = os.environ.get("CUOPT_REMOTE_HOST")
+    remote_port = os.environ.get("CUOPT_REMOTE_PORT")
+    if remote_host and remote_port:
+        print(f"Remote solve enabled: {remote_host}:{remote_port}")
+    else:
+        print("Local solve (no CUOPT_REMOTE_HOST/PORT set)")
+
+    print(f"Reading MPS file: {mps_file}")
+
+    # Parse MPS file into DataModel
+    data_model = cuopt_mps_parser.ParseMps(mps_file)
+
+    # Create solver settings
+    settings = solver_settings.SolverSettings()
+    settings.set_optimality_tolerance(1e-6)
+
+    # Solve
+    print("Solving...")
+    solution = solver.Solve(data_model, settings)
+
+    # Print results
+    print("\nResults:")
+    print("-" * 40)
+    print(f"Status: {solution.get_termination_reason()}")
+    print(f"Objective: {solution.get_primal_objective():.6e}")
+    print(f"Solve time: {solution.get_solve_time():.3f} seconds")
+
+    # Print first few solution values
+    primal = solution.get_primal_solution()
+    if len(primal) > 0:
+        print(f"\nPrimal solution (first 10 of {len(primal)} variables):")
+        for i, val in enumerate(primal[:10]):
+            print(f"  x{i+1} = {val:.6f}")
+        if len(primal) > 10:
+            print(f"  ... ({len(primal) - 10} more variables)")
+
+    print("\nDone!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dev_scripts/test_python_problem_remote.py b/dev_scripts/test_python_problem_remote.py
new file mode 100644
index 000000000..63c766329
--- /dev/null
+++ b/dev_scripts/test_python_problem_remote.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Test Python Problem class with remote solve.
+
+This test uses the high-level Python Problem class (not raw protobuf) and
+verifies it works with both local and remote solve.
+
+Usage:
+  # Test local solve (requires GPU)
+  python test_python_problem_remote.py local
+
+  # Test remote solve (requires cuopt_remote_server running)
+  CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 python test_python_problem_remote.py remote
+"""
+
+import os
+import sys
+
+
+def test_problem_class():
+    """Test the Python Problem class API."""
+    from cuopt import linear_programming
+    from cuopt.linear_programming.problem import Problem, MINIMIZE
+
+    # Check if remote solve is configured
+    remote_host = os.environ.get("CUOPT_REMOTE_HOST", "")
+    remote_port = os.environ.get("CUOPT_REMOTE_PORT", "")
+    is_remote = bool(remote_host and remote_port)
+
+    mode = "REMOTE" if is_remote else "LOCAL"
+    print(f"=== Testing Python Problem class ({mode} solve) ===")
+    if is_remote:
+        print(f"    Server: {remote_host}:{remote_port}")
+
+    # Create a simple LP:
+    #   minimize: x + 2y
+    #   subject to:
+    #     x + y >= 1
+    #     x, y >= 0
+
+    problem = Problem("SimpleLP")
+
+    # Add variables
+    x = problem.addVariable(name="x", lb=0.0)
+    y = problem.addVariable(name="y", lb=0.0)
+
+    # Add constraint: x + y >= 1
+    problem.addConstraint(x + y >= 1, name="c1")
+
+    # Set objective: minimize x + 2y
+    problem.setObjective(x + 2 * y, sense=MINIMIZE)
+
+    print(f"    Variables: {problem.NumVariables}")
+    print(f"    Constraints: {problem.NumConstraints}")
+
+    # Solve
+    settings = linear_programming.SolverSettings()
+    settings.log_to_console = True
+
+    print("    Solving...")
+    problem.solve(settings)
+
+    # Check results
+    print(f"    Status: {problem.Status}")
+    print(f"    Objective value: {problem.ObjValue}")
+    print(f"    Solve time: {problem.SolveTime:.4f}s")
+    print(f"    x = {x.Value}")
+    print(f"    y = {y.Value}")
+
+    # Verify solution
+    # Optimal solution should be x=1, y=0 with objective=1
+    # (since y has coefficient 2 and we minimize)
+    expected_obj = 1.0
+    tolerance = 0.01
+
+    if abs(problem.ObjValue - expected_obj) < tolerance:
+        print(
+            f"\n=== SUCCESS: Objective {problem.ObjValue:.4f} matches expected {expected_obj} ==="
+        )
+        return True
+    else:
+        print(
+            f"\n=== FAILED: Objective {problem.ObjValue:.4f} != expected {expected_obj} ==="
+        )
+        return False
+
+
+def test_mip_problem():
+    """Test a MIP problem."""
+    from cuopt import linear_programming
+    from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE
+
+    remote_host = os.environ.get("CUOPT_REMOTE_HOST", "")
+    remote_port = os.environ.get("CUOPT_REMOTE_PORT", "")
+    is_remote = bool(remote_host and remote_port)
+    mode = "REMOTE" if is_remote else "LOCAL"
+
+    print(f"\n=== Testing Python MIP Problem class ({mode} solve) ===")
+
+    # Simple MIP:
+    #   minimize: x + y
+    #   subject to:
+    #     x + y >= 2.5
+    #     x, y >= 0, x integer
+
+    problem = Problem("SimpleMIP")
+
+    # Add variables
+    x = problem.addVariable(name="x", lb=0.0, vtype=INTEGER)
+    y = problem.addVariable(name="y", lb=0.0)
+
+    # Add constraint
+    problem.addConstraint(x + y >= 2.5, name="c1")
+
+    # Set objective
+    problem.setObjective(x + y, sense=MINIMIZE)
+
+    print(f"    Variables: {problem.NumVariables} (1 integer)")
+    print(f"    Constraints: {problem.NumConstraints}")
+
+    # Solve with MIP settings
+    settings = linear_programming.SolverSettings()
+    settings.log_to_console = True
+    settings.time_limit = 60.0
+
+    print("    Solving...")
+    problem.solve(settings)
+
+    print(f"    Status: {problem.Status}")
+    print(f"    Objective value: {problem.ObjValue}")
+    print(f"    x = {x.Value} (integer)")
+    print(f"    y = {y.Value}")
+
+    # Since x must be integer, optimal is x=1 or x=2 + y to make sum >= 2.5
+    # x=1, y=1.5 gives obj=2.5
+    # x=2, y=0.5 gives obj=2.5
+    # x=3, y=0 gives obj=3
+    # So optimal is 2.5
+    expected_obj = 2.5
+    tolerance = 0.1
+
+    if abs(problem.ObjValue - expected_obj) < tolerance:
+        print(
+            f"\n=== SUCCESS: MIP objective {problem.ObjValue:.4f} matches expected {expected_obj} ==="
+        )
+        return True
+    else:
+        print(
+            f"\n=== FAILED: MIP objective {problem.ObjValue:.4f} != expected {expected_obj} ==="
+        )
+        return False
+
+
+if __name__ == "__main__":
+    # Show usage hint
+    if len(sys.argv) > 1 and sys.argv[1] in ("--help", "-h"):
+        print(__doc__)
+        sys.exit(0)
+
+    # Run tests
+    try:
+        lp_ok = test_problem_class()
+        mip_ok = test_mip_problem()
+
+        if lp_ok and mip_ok:
+            print("\n=== All tests PASSED ===")
+            sys.exit(0)
+        else:
+            print("\n=== Some tests FAILED ===")
+            sys.exit(1)
+    except Exception as e:
+        print(f"\n=== ERROR: {e} ===")
+        import traceback
+
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/docs/developer/REMOTE_SOLVE_GUIDE.md b/docs/developer/REMOTE_SOLVE_GUIDE.md
new file mode 100644
index 000000000..eec753e91
--- /dev/null
+++ b/docs/developer/REMOTE_SOLVE_GUIDE.md
@@ -0,0 +1,198 @@
+# cuOpt Remote Solve Guide
+
+This guide covers the remote solve feature for cuOpt, enabling GPU-accelerated optimization from CPU-only client machines.
+
+## Overview
+
+The remote solve feature allows:
+- **CPU-only clients** to solve LP/MIP problems using a GPU-equipped server
+- **Async job management** for non-blocking operations
+- **Pluggable serialization** (default: Protocol Buffers, also supports MsgPack)
+- **Real-time log streaming** from solver to client
+- **Worker process isolation** with automatic restart on failure
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        CLIENT                                │
+│  - Submits job → receives job ID                            │
+│  - Polls for status (QUEUED/PROCESSING/COMPLETED/FAILED)    │
+│  - Retrieves logs incrementally via GET_LOGS                │
+│  - Retrieves result → gets solution                         │
+│  - Deletes result → frees server memory                     │
+└────────────────────┬────────────────────────────────────────┘
+                     │ TCP/IP (Protobuf or custom serializer)
+                     ▼
+┌─────────────────────────────────────────────────────────────┐
+│                   ASYNC SERVER (Main Process)                │
+│  - Accepts connections (default port 8765)                  │
+│  - Handles: SUBMIT_JOB, CHECK_STATUS, GET_RESULT,           │
+│             DELETE_RESULT, GET_LOGS                         │
+│  - Shared memory job queue                                  │
+│  - Worker monitor thread (restarts dead workers)            │
+│  - Result retrieval thread                                  │
+└──────┬──────────────────────────────────┬──────────────────┘
+       │ Shared Memory                    │
+       ▼                                  ▼
+┌─────────────────────────────────────────────────────────────┐
+│                WORKER PROCESS(es)                            │
+│  - Reads jobs from shared memory queue                      │
+│  - Logs to per-job file (/tmp/cuopt_logs/log_{job_id})     │
+│  - Solves using GPU (cuOpt library)                        │
+│  - Writes results to shared memory result queue            │
+│  - Isolated - crash doesn't affect server                  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Quick Start
+
+### Building
+
+```bash
+# Build with remote server support
+./build.sh libcuopt cuopt_remote_server
+```
+
+### Running the Server
+
+```bash
+# Start server with 4 worker processes on port 8765
+./cpp/build/cuopt_remote_server -p 8765 -w 4
+```
+
+Server options:
+- `-p PORT` - Port number (default: 8765)
+- `-w NUM` - Number of worker processes (default: 4)
+
+### Client Configuration
+
+Set environment variables to enable remote solve:
+
+```bash
+export CUOPT_REMOTE_HOST=server.example.com
+export CUOPT_REMOTE_PORT=8765
+
+# Optional: Use sync mode (blocking with log streaming)
+export CUOPT_REMOTE_USE_SYNC=1
+```
+
+### Using cuopt_cli
+
+```bash
+# Remote solve (async mode by default)
+CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 \
+  ./cpp/build/cuopt_cli problem.mps
+
+# Remote solve with log streaming (sync mode)
+CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 \
+CUOPT_REMOTE_USE_SYNC=1 \
+  ./cpp/build/cuopt_cli --log-to-console 1 problem.mps
+```
+
+### Python Usage
+
+```python
+import os
+os.environ['CUOPT_REMOTE_HOST'] = 'localhost'
+os.environ['CUOPT_REMOTE_PORT'] = '8765'
+
+from cuopt.linear_programming import DataModel, SolverSettings, solve_lp
+
+# Create problem
+dm = DataModel()
+dm.set_csr_constraint_matrix(...)
+# ... set up problem ...
+
+# Solve remotely (transparent to user)
+solution = solve_lp(dm, SolverSettings())
+print(f"Objective: {solution.get_objective_value()}")
+```
+
+## Operating Modes
+
+### Sync Mode (`CUOPT_REMOTE_USE_SYNC=1`)
+- Client sends request and waits for result
+- Server streams solver logs in real-time
+- Best for interactive use
+
+### Async Mode (default)
+- Client submits job, receives job_id immediately
+- Client polls for status and logs incrementally
+- Client retrieves result when complete
+- Best for batch processing, long-running jobs
+
+## Job Lifecycle (Async Mode)
+
+1. **SUBMIT_JOB** → Returns `job_id`
+2. **CHECK_STATUS** → Returns QUEUED | PROCESSING | COMPLETED | FAILED
+3. **GET_LOGS** → Returns log lines from `frombyte` offset
+4. **GET_RESULT** → Returns serialized solution
+5. **DELETE_RESULT** → Removes job from server
+
+## Custom Serialization
+
+The default serializer uses Protocol Buffers. You can provide a custom serializer:
+
+```bash
+# Set custom serializer library
+export CUOPT_SERIALIZER_LIB=/path/to/libcustom_serializer.so
+
+# Run server and client with same serializer
+CUOPT_SERIALIZER_LIB=... ./cpp/build/cuopt_remote_server -p 8765 -w 2
+CUOPT_SERIALIZER_LIB=... ./cpp/build/cuopt_cli problem.mps
+```
+
+See `docs/developer/SERIALIZATION_PLUGIN_GUIDE.md` for implementation details.
+
+## Worker Monitoring
+
+The server automatically monitors worker processes:
+- Detects worker death via `waitpid`
+- Automatically restarts dead workers
+- Marks in-progress jobs as FAILED if worker dies
+- Logs worker lifecycle events
+
+## Files and Components
+
+| Component | Location |
+|-----------|----------|
+| Server executable | `cpp/cuopt_remote_server.cpp` |
+| Client logic | `cpp/src/linear_programming/utilities/remote_solve.cu` |
+| Serialization interface | `cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp` |
+| Protobuf serializer | `cpp/src/linear_programming/utilities/protobuf_serializer.cu` |
+| Protobuf schema | `cpp/src/linear_programming/utilities/cuopt_remote.proto` |
+| MsgPack serializer (example) | `cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp` |
+
+## Troubleshooting
+
+### Library Loading Issues (Development)
+
+If you're developing and the wrong `libcuopt.so` is being loaded:
+
+```bash
+# Use LD_PRELOAD to force loading local build
+LD_PRELOAD=cpp/build/libcuopt.so ./cpp/build/cuopt_cli problem.mps
+```
+
+For production, use `build.sh` which sets up proper RPATH.
+
+### Server Not Responding
+
+1. Check server is running: `pgrep -af cuopt_remote_server`
+2. Check port is listening: `ss -tlnp | grep 8765`
+3. Check firewall allows connections
+
+### Job Stuck in PROCESSING
+
+- Worker may have crashed - check server logs
+- Server will mark job as FAILED after detecting worker death
+
+## Environment Variables Reference
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `CUOPT_REMOTE_HOST` | Server hostname/IP | (none - local solve) |
+| `CUOPT_REMOTE_PORT` | Server port | (none - local solve) |
+| `CUOPT_REMOTE_USE_SYNC` | Use sync mode if "1" | "0" (async) |
+| `CUOPT_SERIALIZER_LIB` | Path to custom serializer | (uses protobuf) |
diff --git a/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md
new file mode 100644
index 000000000..605f73d50
--- /dev/null
+++ b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md
@@ -0,0 +1,595 @@
+# cuOpt Remote Solve Serialization Plugin Guide
+
+This guide explains how to develop custom serialization plugins for cuOpt's remote solve feature. Plugins allow you to replace the default Protocol Buffers serialization with alternative formats like MsgPack, JSON, FlatBuffers, or custom binary protocols.
+
+## Overview
+
+The remote solve feature uses a pluggable serialization interface (`remote_serializer_t`) that handles:
+- Serializing optimization problems (LP/MIP) for network transmission
+- Deserializing solver settings
+- Serializing solutions back to the client
+- Message type identification (LP vs MIP)
+
+```
+┌─────────────┐                              ┌─────────────┐
+│   Client    │                              │   Server    │
+│             │   serialize_lp_request()     │             │
+│  Problem ───┼──────────────────────────────┼──► Problem  │
+│             │                              │             │
+│             │   serialize_lp_solution()    │             │
+│  Solution ◄─┼──────────────────────────────┼─── Solution │
+└─────────────┘                              └─────────────┘
+         ▲                                          ▲
+         │                                          │
+         └────────── Same Serializer ───────────────┘
+```
+
+**Important**: Both client and server must use the same serializer for communication to work.
+
+## The Serializer Interface
+
+Your plugin must implement the `remote_serializer_t<i_t, f_t>` interface defined in:
+`cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp`
+
+### Required Methods
+
+```cpp
+template <typename i_t, typename f_t>
+class remote_serializer_t {
+public:
+  virtual ~remote_serializer_t() = default;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // CLIENT-SIDE: Serialize requests, deserialize solutions
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Serialize an LP problem and settings into bytes for transmission
+  virtual std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& problem,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Serialize a MIP problem and settings into bytes
+  virtual std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& problem,
+    const mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Deserialize an LP solution from bytes received from server
+  virtual optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  // Deserialize a MIP solution from bytes
+  virtual mip_solution_t<i_t, f_t> deserialize_mip_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // SERVER-SIDE: Deserialize requests, serialize solutions
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Check if the received data is a MIP request (vs LP)
+  virtual bool is_mip_request(const std::vector<uint8_t>& data) = 0;
+
+  // Deserialize LP request into problem data and settings
+  virtual bool deserialize_lp_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& problem_data,
+    pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Deserialize MIP request into problem data and settings
+  virtual bool deserialize_mip_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& problem_data,
+    mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Serialize LP solution for transmission back to client
+  virtual std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) = 0;
+
+  // Serialize MIP solution
+  virtual std::vector<uint8_t> serialize_mip_solution(
+    const mip_solution_t<i_t, f_t>& solution) = 0;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // METADATA
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Human-readable format name (e.g., "msgpack", "json", "flatbuffers")
+  virtual std::string format_name() const = 0;
+
+  // Protocol version for compatibility checking
+  virtual uint32_t protocol_version() const = 0;
+};
+```
+
+### Factory Function
+
+Your plugin must export a factory function that creates the serializer:
+
+```cpp
+extern "C" {
+  // For int32_t indices, double floats (most common)
+  std::unique_ptr<remote_serializer_t<int32_t, double>>
+    create_cuopt_serializer_i32_f64();
+
+  // Additional type combinations if needed
+  std::unique_ptr<remote_serializer_t<int32_t, float>>
+    create_cuopt_serializer_i32_f32();
+}
+```
+
+## Step-by-Step Implementation
+
+### Step 1: Create the Plugin Source File
+
+Create `cpp/src/linear_programming/utilities/serializers/my_serializer.cpp`:
+
+```cpp
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <vector>
+#include <cstring>
+
+namespace cuopt::linear_programming {
+
+// Message type identifiers (first byte of each message)
+constexpr uint8_t MSG_LP_REQUEST   = 1;
+constexpr uint8_t MSG_MIP_REQUEST  = 2;
+constexpr uint8_t MSG_LP_SOLUTION  = 3;
+constexpr uint8_t MSG_MIP_SOLUTION = 4;
+
+template <typename i_t, typename f_t>
+class my_serializer_t : public remote_serializer_t<i_t, f_t> {
+public:
+  my_serializer_t() = default;
+  ~my_serializer_t() override = default;
+
+  std::string format_name() const override { return "my_format"; }
+  uint32_t protocol_version() const override { return 1; }
+
+  //========================================================================
+  // CLIENT-SIDE METHODS
+  //========================================================================
+
+  std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    std::vector<uint8_t> buffer;
+
+    // Start with message type
+    buffer.push_back(MSG_LP_REQUEST);
+
+    // Serialize problem dimensions
+    i_t n_rows = view.get_constraint_matrix_offsets().size() > 0
+                   ? view.get_constraint_matrix_offsets().size() - 1 : 0;
+    i_t n_cols = view.get_objective_coefficients().size();
+    i_t nnz = view.get_constraint_matrix_values().size();
+
+    // ... serialize all problem data ...
+    // See msgpack_serializer.cpp for complete example
+
+    return buffer;
+  }
+
+  std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_MIP_REQUEST);
+    // ... similar to LP but with MIP settings ...
+    return buffer;
+  }
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    // Parse the solution data
+    // Create and return solution object
+
+    // On error, return error solution:
+    // return optimization_problem_solution_t<i_t, f_t>(
+    //   cuopt::logic_error("Parse error", cuopt::error_type_t::RuntimeError));
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    // Similar to LP solution
+  }
+
+  //========================================================================
+  // SERVER-SIDE METHODS
+  //========================================================================
+
+  bool is_mip_request(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return false;
+    return data[0] == MSG_MIP_REQUEST;
+  }
+
+  bool deserialize_lp_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+    pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    try {
+      // Parse message type
+      if (data.empty() || data[0] != MSG_LP_REQUEST) return false;
+
+      // Parse problem data and populate mps_data:
+      // mps_data.set_problem_name("...");
+      // mps_data.set_objective_coefficients(coeffs.data(), coeffs.size());
+      // mps_data.set_csr_constraint_matrix(...);
+      // mps_data.set_variable_bounds(...);
+      // mps_data.set_constraint_bounds(...);
+
+      // Parse settings:
+      // settings.time_limit = ...;
+      // settings.iteration_limit = ...;
+
+      return true;
+    } catch (...) {
+      return false;
+    }
+  }
+
+  bool deserialize_mip_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+    mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Similar to LP, also set variable types for integers/binaries:
+    // mps_data.set_variable_types(var_types);
+    return true;
+  }
+
+  std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_LP_SOLUTION);
+
+    // NOTE: Server calls solution.to_host() before serialization,
+    // so solution data is always in CPU memory. Use:
+    //   solution.get_primal_solution_host()
+    //   solution.get_dual_solution_host()
+    //   solution.get_reduced_cost_host()
+
+    // Serialize termination status, objective, solution vectors, etc.
+
+    return buffer;
+  }
+
+  std::vector<uint8_t> serialize_mip_solution(
+    const mip_solution_t<i_t, f_t>& solution) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_MIP_SOLUTION);
+
+    // Use solution.get_solution_host() for the solution vector
+
+    return buffer;
+  }
+};
+
+//==========================================================================
+// FACTORY FUNCTIONS - Must be exported with C linkage
+//==========================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<remote_serializer_t<i_t, f_t>> create_serializer_impl()
+{
+  return std::make_unique<my_serializer_t<i_t, f_t>>();
+}
+
+}  // namespace cuopt::linear_programming
+
+// Export factory functions with C linkage for dlopen/dlsym
+extern "C" {
+
+std::unique_ptr<cuopt::linear_programming::remote_serializer_t<int32_t, double>>
+create_cuopt_serializer_i32_f64()
+{
+  return cuopt::linear_programming::create_serializer_impl<int32_t, double>();
+}
+
+std::unique_ptr<cuopt::linear_programming::remote_serializer_t<int32_t, float>>
+create_cuopt_serializer_i32_f32()
+{
+  return cuopt::linear_programming::create_serializer_impl<int32_t, float>();
+}
+
+// Add more type combinations as needed
+
+}
+```
+
+### Step 2: Create CMakeLists.txt for the Plugin
+
+Create `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt`:
+
+```cmake
+# Build the custom serializer as a shared library plugin
+add_library(cuopt_my_serializer SHARED my_serializer.cpp)
+
+target_link_libraries(cuopt_my_serializer
+  PRIVATE
+    cuopt  # Link against cuOpt for solution types
+)
+
+target_include_directories(cuopt_my_serializer
+  PRIVATE
+    ${CMAKE_SOURCE_DIR}/include
+)
+
+# Set RPATH so the plugin can find libcuopt.so
+set_target_properties(cuopt_my_serializer PROPERTIES
+  INSTALL_RPATH "$ORIGIN"
+)
+
+install(TARGETS cuopt_my_serializer
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+```
+
+### Step 3: Add to Parent CMakeLists.txt
+
+In `cpp/CMakeLists.txt`, add:
+
+```cmake
+add_subdirectory(src/linear_programming/utilities/serializers)
+```
+
+### Step 4: Build the Plugin
+
+```bash
+# Build everything including the plugin
+./build.sh libcuopt cuopt_remote_server
+
+# Or just the plugin (after initial build)
+cd cpp/build
+ninja cuopt_my_serializer
+```
+
+## Using the Plugin
+
+### Environment Variable
+
+Set `CUOPT_SERIALIZER_LIB` to point to your plugin:
+
+```bash
+export CUOPT_SERIALIZER_LIB=/path/to/libcuopt_my_serializer.so
+```
+
+### Running Server with Custom Serializer
+
+```bash
+# Set the serializer library
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so
+
+# Start the server
+cuopt_remote_server -p 8765
+```
+
+Server output will show:
+```
+[remote_solve] Loading custom serializer from: /path/to/libcuopt_my_serializer.so
+[remote_solve] Using custom serializer: my_format
+```
+
+### Running Client with Custom Serializer
+
+```bash
+# Same serializer must be used on client side
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so
+export CUOPT_REMOTE_HOST=localhost
+export CUOPT_REMOTE_PORT=8765
+
+# Run cuopt_cli
+cuopt_cli problem.mps
+
+# Or Python
+python my_solver_script.py
+```
+
+### Complete Example Session
+
+```bash
+# Terminal 1: Start server with msgpack serializer
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so
+cuopt_remote_server -p 8765
+
+# Terminal 2: Run client with same serializer
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so
+export CUOPT_REMOTE_HOST=localhost
+export CUOPT_REMOTE_PORT=8765
+cuopt_cli /path/to/problem.mps
+```
+
+## Data Model Reference
+
+### Problem Data (`data_model_view_t`)
+
+Key getters for serializing problem data:
+
+```cpp
+// Problem metadata
+view.get_problem_name()           // std::string
+view.get_objective_name()         // std::string
+view.get_sense()                  // bool (true = maximize)
+view.get_objective_scaling_factor() // f_t
+view.get_objective_offset()       // f_t
+
+// Constraint matrix (CSR format)
+view.get_constraint_matrix_values()   // span<f_t>
+view.get_constraint_matrix_indices()  // span<i_t>
+view.get_constraint_matrix_offsets()  // span<i_t>
+
+// Objective and bounds
+view.get_objective_coefficients()     // span<f_t>
+view.get_variable_lower_bounds()      // span<f_t>
+view.get_variable_upper_bounds()      // span<f_t>
+view.get_constraint_lower_bounds()    // span<f_t>
+view.get_constraint_upper_bounds()    // span<f_t>
+
+// For MIP problems
+view.get_variable_types()             // span<char> ('C', 'I', 'B')
+
+// Names (optional)
+view.get_variable_names()             // vector<string>
+view.get_row_names()                  // vector<string>
+```
+
+### Problem Data (`mps_data_model_t`) - Server Side
+
+Key setters for deserializing:
+
+```cpp
+mps_data.set_problem_name(name);
+mps_data.set_objective_name(name);
+mps_data.set_maximize(bool);
+mps_data.set_objective_scaling_factor(factor);
+mps_data.set_objective_offset(offset);
+
+mps_data.set_objective_coefficients(ptr, size);
+mps_data.set_csr_constraint_matrix(values, nvals, indices, nidx, offsets, noff);
+mps_data.set_variable_bounds(lower, upper, size);
+mps_data.set_constraint_bounds(lower, upper, size);
+
+// For MIP
+mps_data.set_variable_types(std::vector<char>);
+```
+
+### LP Solution (`optimization_problem_solution_t`)
+
+```cpp
+// Getters (for serialization)
+solution.get_termination_status()     // pdlp_termination_status_t
+solution.get_objective_value()        // f_t
+solution.get_primal_solution_host()   // vector<f_t>&
+solution.get_dual_solution_host()     // vector<f_t>&
+solution.get_reduced_cost_host()      // vector<f_t>&
+solution.get_solve_time()             // double
+solution.get_l2_primal_residual()     // f_t
+solution.get_l2_dual_residual()       // f_t
+solution.get_gap()                    // f_t
+solution.get_nb_iterations()          // i_t
+
+// Setters (for deserialization on client)
+solution.set_termination_status(status);
+solution.set_objective_value(value);
+solution.set_primal_solution_host(vector);
+solution.set_dual_solution_host(vector);
+solution.set_reduced_cost_host(vector);
+solution.set_solve_time(time);
+// ... etc
+```
+
+### MIP Solution (`mip_solution_t`)
+
+```cpp
+// Getters
+solution.get_termination_status()     // mip_termination_status_t
+solution.get_objective_value()        // f_t
+solution.get_solution_host()          // vector<f_t>&
+solution.get_total_solve_time()       // double
+solution.get_mip_gap()                // f_t
+
+// Setters
+solution.set_solution_host(vector);
+solution.set_objective_value(value);
+solution.set_mip_gap(gap);
+// ... etc
+```
+
+## Tips and Best Practices
+
+### 1. Message Type Identification
+
+Always include a message type identifier as the first byte(s):
+
+```cpp
+constexpr uint8_t MSG_LP_REQUEST   = 1;
+constexpr uint8_t MSG_MIP_REQUEST  = 2;
+constexpr uint8_t MSG_LP_SOLUTION  = 3;
+constexpr uint8_t MSG_MIP_SOLUTION = 4;
+```
+
+### 2. Version Compatibility
+
+Include a protocol version in your messages for future compatibility:
+
+```cpp
+// In serialize:
+buffer.push_back(MSG_LP_REQUEST);
+buffer.push_back(PROTOCOL_VERSION);
+
+// In deserialize:
+uint8_t version = data[1];
+if (version != PROTOCOL_VERSION) {
+  // Handle version mismatch
+}
+```
+
+### 3. Error Handling
+
+Return proper error solutions on parse failures:
+
+```cpp
+optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(...) {
+  try {
+    // Parse...
+  } catch (const std::exception& e) {
+    return optimization_problem_solution_t<i_t, f_t>(
+      cuopt::logic_error(
+        std::string("Deserialize error: ") + e.what(),
+        cuopt::error_type_t::RuntimeError));
+  }
+}
+```
+
+### 4. Solution Memory
+
+The server calls `solution.to_host()` before serialization, so:
+- Always use `get_*_host()` methods for solution data
+- No need to handle GPU memory in your serializer
+
+### 5. Testing
+
+Test your serializer with both LP and MIP problems:
+
+```bash
+# LP test
+cuopt_cli /path/to/lp_problem.mps
+
+# MIP test (use a problem with integer variables)
+cuopt_cli /path/to/mip_problem.mps
+```
+
+## Reference Implementation
+
+See the MsgPack serializer for a complete working example:
+- `cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp`
+- `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt`
+
+## Troubleshooting
+
+### "Failed to load serializer library"
+
+- Check the path in `CUOPT_SERIALIZER_LIB` is correct
+- Ensure the library was built: `ls $CONDA_PREFIX/lib/libcuopt_*serializer.so`
+
+### "Factory function not found"
+
+- Ensure factory functions are exported with `extern "C"`
+- Check function names match: `create_cuopt_serializer_i32_f64`, etc.
+
+### "Read failed" / Malformed messages
+
+- Ensure client and server use the **same** serializer
+- Check message framing is consistent
+- Verify all required fields are serialized
+
+### Symbol errors at runtime
+
+- Rebuild and reinstall with `./build.sh libcuopt cuopt_remote_server`
+- Ensure plugin links against `cuopt`
diff --git a/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md
new file mode 100644
index 000000000..33553ef5c
--- /dev/null
+++ b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md
@@ -0,0 +1,236 @@
+# cuOpt Solution Memory Architecture
+
+This document describes how cuOpt manages solution data memory for both local GPU-based solving and remote CPU-only solving.
+
+## Overview
+
+cuOpt solutions can exist in either GPU memory (for local high-performance workflows) or CPU memory (for remote solve and CPU-only clients). The architecture supports both use cases efficiently.
+
+## Solution Classes
+
+### LP Solution: `optimization_problem_solution_t<i_t, f_t>`
+
+Located in: `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp`
+
+**Key Data Members:**
+```cpp
+// GPU memory (primary storage for local solve)
+rmm::device_uvector<f_t> primal_solution_;
+rmm::device_uvector<f_t> dual_solution_;
+rmm::device_uvector<f_t> reduced_cost_;
+
+// CPU memory (used for remote solve or explicit host access)
+std::vector<f_t> primal_solution_host_;
+std::vector<f_t> dual_solution_host_;
+std::vector<f_t> reduced_cost_host_;
+
+// Scalars (always on host)
+f_t objective_value_;
+f_t dual_objective_value_;
+f_t l2_primal_residual_;
+f_t l2_dual_residual_;
+f_t gap_;
+i_t nb_iterations_;
+f_t solve_time_;
+pdlp_termination_status_t termination_status_;
+error_type_t error_status_;
+```
+
+### MIP Solution: `mip_solution_t<i_t, f_t>`
+
+Located in: `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp`
+
+**Key Data Members:**
+```cpp
+// GPU memory (primary storage for local solve)
+rmm::device_uvector<f_t> solution_;
+std::vector<rmm::device_uvector<f_t>> solution_pool_;
+
+// CPU memory (used for remote solve)
+std::vector<f_t> solution_host_;
+std::vector<std::vector<f_t>> solution_pool_host_;
+
+// Scalars (always on host)
+f_t objective_;
+f_t mip_gap_;
+f_t max_constraint_violation_;
+f_t max_int_violation_;
+f_t max_variable_bound_violation_;
+mip_termination_status_t termination_status_;
+error_type_t error_status_;
+```
+
+## Memory Management Strategy
+
+### Local Solve (GPU)
+
+When solving locally on a GPU:
+
+1. **Solver computes** → Results in GPU memory (`device_uvector`)
+2. **Solution returned** → Contains GPU buffers
+3. **User accesses** → Can work directly with GPU data or copy to host as needed
+
+```
+┌─────────────┐     ┌─────────────┐     ┌─────────────┐
+│   Solver    │ ──► │  Solution   │ ──► │    User     │
+│   (GPU)     │     │ (GPU mem)   │     │ (GPU/CPU)   │
+└─────────────┘     └─────────────┘     └─────────────┘
+```
+
+### Remote Solve (CPU-only client)
+
+When solving remotely from a CPU-only machine:
+
+1. **Client sends** → Problem data serialized and sent to server
+2. **Server solves** → Results computed on GPU
+3. **`to_host()` called** → GPU data copied to CPU memory
+4. **Solution serialized** → CPU data sent back to client
+5. **Client receives** → Solution with CPU memory only
+
+```
+┌──────────┐     ┌──────────────────────────────────────────┐     ┌──────────┐
+│  Client  │ ──► │              SERVER                      │ ◄── │  Client  │
+│ (no GPU) │     │ GPU solve → to_host() → serialize        │     │(solution)│
+└──────────┘     └──────────────────────────────────────────┘     └──────────┘
+```
+
+## The `to_host()` Method
+
+Both solution classes provide a `to_host()` method that copies GPU data to CPU:
+
+```cpp
+// LP Solution
+void optimization_problem_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (primal_solution_.size() > 0) {
+    primal_solution_host_.resize(primal_solution_.size());
+    raft::copy(primal_solution_host_.data(), primal_solution_.data(),
+               primal_solution_.size(), stream_view);
+  }
+  // ... similar for dual_solution_, reduced_cost_
+  stream_view.synchronize();
+}
+
+// MIP Solution
+void mip_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (solution_.size() > 0) {
+    solution_host_.resize(solution_.size());
+    raft::copy(solution_host_.data(), solution_.data(),
+               solution_.size(), stream_view);
+  }
+  // ... similar for solution_pool_
+  stream_view.synchronize();
+}
+```
+
+### When to Call `to_host()`
+
+- **Server-side remote solve**: Called before serializing solution for network transmission
+- **Client accessing host data**: If user needs `std::vector` access to solution data
+- **Writing to files**: When saving solutions to disk
+
+### Performance Considerations
+
+The `to_host()` copy adds overhead, but:
+- Only called when CPU access is actually needed
+- GPU computation dominates solve time for non-trivial problems
+- One-time cost after solve completes
+
+**Typical overhead**: Negligible for problems with thousands of variables. For a 10,000-variable problem, copying ~80KB takes <1ms.
+
+## Accessor Methods
+
+### GPU Accessors (for local solve)
+
+```cpp
+// LP
+const rmm::device_uvector<f_t>& get_primal_solution() const;
+const rmm::device_uvector<f_t>& get_dual_solution() const;
+const rmm::device_uvector<f_t>& get_reduced_cost() const;
+
+// MIP
+const rmm::device_uvector<f_t>& get_solution() const;
+```
+
+### CPU Accessors (for remote solve)
+
+```cpp
+// LP
+const std::vector<f_t>& get_primal_solution_host() const;
+const std::vector<f_t>& get_dual_solution_host() const;
+const std::vector<f_t>& get_reduced_cost_host() const;
+
+// MIP
+const std::vector<f_t>& get_solution_host() const;
+```
+
+### Checking Memory Location
+
+```cpp
+// Returns true if solution data is on GPU
+bool is_device_memory() const;
+```
+
+## Usage in Remote Solve Server
+
+The server calls `to_host()` before serialization:
+
+```cpp
+// In cuopt_remote_server.cpp
+if (is_mip) {
+  mip_solution_t<i_t, f_t> solution = solve_mip(...);
+  solution.to_host(stream);  // Copy GPU → CPU
+  result_data = serializer->serialize_mip_solution(solution);
+} else {
+  optimization_problem_solution_t<i_t, f_t> solution = solve_lp(...);
+  solution.to_host(stream);  // Copy GPU → CPU
+  result_data = serializer->serialize_lp_solution(solution);
+}
+```
+
+## Design Rationale
+
+### Why Not Pure CPU Memory?
+
+An earlier design considered using only `std::vector` for solutions. We chose the hybrid approach because:
+
+1. **GPU performance**: Local solves benefit from keeping data on GPU
+2. **Minimize changes**: Existing GPU-based code continues to work unchanged
+3. **Flexibility**: Users can choose GPU or CPU access as needed
+
+### Why Not Pure GPU Memory?
+
+Pure GPU memory would fail for:
+
+1. **Remote solve**: CPU-only clients need CPU data
+2. **Serialization**: Network transmission requires CPU memory
+3. **File I/O**: Writing to disk typically uses CPU
+
+### Hybrid Approach Benefits
+
+- ✅ Local GPU workflows remain efficient
+- ✅ Remote solve works with CPU-only clients
+- ✅ Minimal code changes to existing solvers
+- ✅ On-demand copy (only when needed)
+- ✅ Clear separation of concerns
+
+## Files Involved
+
+| File | Description |
+|------|-------------|
+| `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp` | LP solution class declaration |
+| `cpp/src/linear_programming/solver_solution.cu` | LP solution implementation + `to_host()` |
+| `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp` | MIP solution class declaration |
+| `cpp/src/mip/solver_solution.cu` | MIP solution implementation + `to_host()` |
+| `cpp/cuopt_remote_server.cpp` | Server calls `to_host()` before serialization |
+| `cpp/src/linear_programming/utilities/protobuf_serializer.cu` | Uses host accessors for serialization |
+
+## Summary
+
+The cuOpt solution memory architecture uses a **hybrid GPU/CPU approach**:
+
+1. **Primary storage**: GPU (`device_uvector`) for local solve performance
+2. **Secondary storage**: CPU (`std::vector`) for remote solve and host access
+3. **On-demand copying**: `to_host()` method copies GPU → CPU when needed
+4. **Transparent to users**: Local users get GPU data, remote users get CPU data automatically
diff --git a/docs/remote_solve_architecture.md b/docs/remote_solve_architecture.md
new file mode 100644
index 000000000..157f93b17
--- /dev/null
+++ b/docs/remote_solve_architecture.md
@@ -0,0 +1,556 @@
+# cuOpt Remote Solve Architecture
+
+## Document Purpose
+
+This document describes the client-server architecture for cuOpt's remote solve capability. It is intended for security review and covers communication protocols, process architecture, data flow, and trust boundaries.
+
+---
+
+## 1. System Overview
+
+The remote solve feature allows clients to submit optimization problems (LP/MIP) to a server for execution on GPU-accelerated hardware. The architecture supports both synchronous (blocking) and asynchronous (job-based) operation modes.
+
+### High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                              CLIENT PROCESS                                 │
+│  ┌─────────────────┐    ┌──────────────────┐    ┌───────────────────────┐   │
+│  │  cuOpt Library  │───▶│ Remote Serializer│───▶│   TCP Socket Client   │   │
+│  │  (User Code)    │    │ (Protobuf/Custom)│    │                       │   │
+│  └─────────────────┘    └──────────────────┘    └───────────┬───────────┘   │
+└──────────────────────────────────────────────────────────────┼──────────────┘
+                                                               │
+                                              TCP Connection   │ Port 9090
+                                              (Binary Protocol)│
+                                                               ▼
+┌──────────────────────────────────────────────────────────────┼──────────────┐
+│                            SERVER PROCESS                    │              │
+│  ┌───────────────────────────────────────────────────────────┴───────────┐  │
+│  │                        Main Server Thread                             │  │
+│  │  - Accept connections (thread-per-connection)                         │  │
+│  │  - Parse requests via pluggable serializer                            │  │
+│  │  - Route to sync handler or async job queue                           │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                    │                                        │
+│              ┌─────────────────────┼─────────────────────┐                  │
+│              ▼                     ▼                     ▼                  │
+│  ┌───────────────────┐  ┌───────────────────┐  ┌───────────────────┐        │
+│  │ Result Retrieval  │  │  Worker Monitor   │  │  Connection       │        │
+│  │     Thread        │  │     Thread        │  │  Handler Threads  │        │
+│  └─────────┬─────────┘  └─────────┬─────────┘  └───────────────────┘        │
+│            │                      │                                         │
+│            │     POSIX Shared Memory (Job Queue, Result Queue)              │
+│            │                      │                                         │
+│  ┌─────────┴──────────────────────┴─────────────────────────────────────┐   │
+│  │                        Shared Memory Region                          │   │
+│  │  ┌─────────────────┐  ┌──────────────────┐  ┌─────────────────────┐  │   │
+│  │  │   Job Queue     │  │  Result Queue    │  │  Control Block      │  │   │
+│  │  │  (MAX_JOBS=64)  │  │ (MAX_RESULTS=64) │  │ (shutdown flag)     │  │   │
+│  │  └─────────────────┘  └──────────────────┘  └─────────────────────┘  │   │
+│  └──────────────────────────────────────────────────────────────────────┘   │
+│                                    │                                        │
+│              ┌─────────────────────┼─────────────────────┐                  │
+│              ▼                     ▼                     ▼                  │
+│  ┌───────────────────┐  ┌───────────────────┐  ┌───────────────────┐        │
+│  │   Worker Process  │  │   Worker Process  │  │   Worker Process  │        │
+│  │      (fork)       │  │      (fork)       │  │      (fork)       │        │
+│  │  - GPU Solver     │  │  - GPU Solver     │  │  - GPU Solver     │        │
+│  │  - Isolated       │  │  - Isolated       │  │  - Isolated       │        │
+│  └───────────────────┘  └───────────────────┘  └───────────────────┘        │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 2. Components
+
+### 2.1 Client Components
+
+| Component | Description |
+|-----------|-------------|
+| **cuOpt Library** | User-facing API (`solve_lp_remote`, `solve_mip_remote`) |
+| **Remote Serializer** | Pluggable serialization (default: Protocol Buffers) |
+| **TCP Client** | Socket connection to server, length-prefixed messages |
+
+### 2.2 Server Components
+
+| Component | Description |
+|-----------|-------------|
+| **Main Thread** | Accepts TCP connections, spawns handler threads |
+| **Connection Handlers** | Per-connection threads that parse and route requests |
+| **Result Retrieval Thread** | Polls shared memory for completed results |
+| **Worker Monitor Thread** | Monitors worker processes via `waitpid()`, restarts dead workers |
+| **Worker Processes** | Forked processes that execute GPU solves |
+| **Shared Memory** | POSIX shared memory for IPC between main process and workers |
+
+---
+
+## 3. Communication Protocol
+
+### 3.1 Transport Layer
+
+- **Protocol**: TCP/IP
+- **Default Port**: 9090 (configurable via `-p` flag)
+- **Encryption**: None (plaintext) - **Security Note: TLS not implemented**
+- **Authentication**: None - **Security Note: No auth mechanism**
+
+### 3.2 Message Format
+
+All messages use a simple length-prefixed binary format:
+
+```
+┌────────────────┬─────────────────────────────────┐
+│  Length (4B)   │         Payload (N bytes)       │
+│  Little-endian │    Serialized Protobuf/Custom   │
+└────────────────┴─────────────────────────────────┘
+```
+
+### 3.3 Request Types
+
+| Code | Type | Description |
+|------|------|-------------|
+| 0 | `SUBMIT_JOB` | Submit optimization problem, receive job_id |
+| 1 | `CHECK_STATUS` | Query job status by job_id |
+| 2 | `GET_RESULT` | Retrieve solution for completed job |
+| 3 | `DELETE_RESULT` | Delete job and free resources |
+| 4 | `GET_LOGS` | Retrieve solver output logs |
+| 5 | `CANCEL_JOB` | Cancel queued or running job |
+| 6 | `WAIT_FOR_RESULT` | Block until job completes (no polling) |
+
+### 3.4 Job Status Codes
+
+| Code | Status | Description |
+|------|--------|-------------|
+| 0 | `QUEUED` | Job submitted, waiting for worker |
+| 1 | `PROCESSING` | Worker is solving the problem |
+| 2 | `COMPLETED` | Solution available |
+| 3 | `FAILED` | Solve failed with error |
+| 4 | `NOT_FOUND` | Job ID does not exist |
+| 5 | `CANCELLED` | Job was cancelled by user |
+
+---
+
+## 4. Sequence Diagrams
+
+### 4.1 Asynchronous Job Flow (Normal Case)
+
+```
+Client                    Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB (problem)     │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │                           │ Generate job_id           │
+  │                           │ Write to Job Queue (shm)  │
+  │                           │                           │
+  │  Response (job_id)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │      Poll Job Queue       │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │                           │      Claim Job            │
+  │                           │       (set claimed=true,  │
+  │                           │        worker_pid)        │
+  │                           │──────────────────────────▶│
+  │                           │                           │
+  │  CHECK_STATUS (job_id)    │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (PROCESSING)    │      Execute GPU Solve    │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │         ...               │         ...               │
+  │                           │                           │
+  │                           │      Write Result (shm)   │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │                           │ Result Retrieval Thread   │
+  │                           │ updates job_tracker       │
+  │                           │                           │
+  │  CHECK_STATUS (job_id)    │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (COMPLETED)     │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  GET_RESULT (job_id)      │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (solution)      │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  DELETE_RESULT (job_id)   │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (OK)            │                           │
+  │◀──────────────────────────│                           │
+```
+
+### 4.2 WAIT_FOR_RESULT Flow (Blocking Wait)
+
+```
+Client                    Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB (problem)     │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │  Response (job_id)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │  WAIT_FOR_RESULT (job_id) │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │                           │ Handler thread creates    │
+  │                           │ JobWaiter with CV         │
+  │                           │                           │
+  │    (connection held       │ Thread blocks on          │
+  │     open, no response     │ condition_variable.wait() │
+  │     yet)                  │                           │
+  │                           │                           │
+  │                           │      Execute GPU Solve    │
+  │                           │◀─────────────────────────▶│
+  │                           │                           │
+  │                           │      Write Result (shm)   │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+  │                           │ Result thread signals     │
+  │                           │ condition_variable        │
+  │                           │                           │
+  │                           │ Handler thread wakes      │
+  │                           │                           │
+  │  Response (solution)      │                           │
+  │◀──────────────────────────│                           │
+```
+
+### 4.3 Job Cancellation Flow
+
+```
+Client A                  Server (Main)              Worker Process
+  │                           │                           │
+  │  SUBMIT_JOB               │                           │
+  │──────────────────────────▶│                           │
+  │  Response (job_id)        │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │      Worker claims job    │
+  │                           │◀──────────────────────────│
+  │                           │                           │
+Client B                      │      Solving...           │
+  │                           │                           │
+  │  CANCEL_JOB (job_id)      │                           │
+  │──────────────────────────▶│                           │
+  │                           │                           │
+  │                           │ kill(worker_pid, SIGKILL) │
+  │                           │──────────────────────────▶│
+  │                           │                           │
+  │                           │      Worker dies          │
+  │                           │                           ✗
+  │                           │                           │
+  │  Response (CANCELLED)     │                           │
+  │◀──────────────────────────│                           │
+  │                           │                           │
+  │                           │ Monitor thread detects    │
+  │                           │ dead worker via waitpid() │
+  │                           │                           │
+  │                           │ Restart worker (fork)     │
+  │                           │                           │
+  │                           │      New Worker           │
+  │                           │◀──────────────────────────│
+```
+
+### 4.4 Worker Crash Recovery
+
+```
+Server (Main)                         Worker Process
+     │                                      │
+     │           Worker processing job      │
+     │◀────────────────────────────────────▶│
+     │                                      │
+     │              CRASH/SEGFAULT          │
+     │                                      ✗
+     │                                      │
+     │  Worker Monitor Thread               │
+     │  waitpid() returns                   │
+     │                                      │
+     │  Mark job as FAILED                  │
+     │  Signal any waiting threads          │
+     │                                      │
+     │  fork() new worker                   │
+     │──────────────────────────────────────▶
+     │                                      │
+     │           New Worker Ready           │
+     │◀─────────────────────────────────────│
+```
+
+---
+
+## 5. Shared Memory Architecture
+
+### 5.1 Memory Regions
+
+Three POSIX shared memory segments are created:
+
+| Name | Size | Purpose |
+|------|------|---------|
+| `/cuopt_job_queue` | ~64MB | Pending job entries |
+| `/cuopt_result_queue` | ~64MB | Completed job results |
+| `/cuopt_control` | ~64B | Shutdown flag, control signals |
+
+### 5.2 Job Queue Entry Structure
+
+```cpp
+struct JobQueueEntry {
+    char job_id[32];              // Unique job identifier
+    uint32_t problem_type;        // 0=LP, 1=MIP
+    uint32_t data_size;           // Size of serialized problem
+    uint8_t data[MAX_JOB_DATA];   // Serialized problem data (~1MB)
+    pid_t worker_pid;             // PID of worker processing this job
+    std::atomic<bool> ready;      // Job ready for processing
+    std::atomic<bool> claimed;    // Job claimed by a worker
+    std::atomic<bool> cancelled;  // Job cancelled by user
+};
+```
+
+### 5.3 Result Queue Entry Structure
+
+```cpp
+struct ResultQueueEntry {
+    char job_id[32];              // Job identifier
+    uint32_t status;              // 0=success, 1=error, 2=cancelled
+    uint32_t data_size;           // Size of result data
+    uint8_t data[MAX_RESULT_DATA]; // Serialized solution (~1MB)
+    char error_message[256];      // Error message if failed
+    std::atomic<bool> ready;      // Result ready for retrieval
+    std::atomic<bool> retrieved;  // Result has been retrieved
+};
+```
+
+---
+
+## 6. Process Model
+
+### 6.1 Process Hierarchy
+
+```
+cuopt_remote_server (main process)
+├── Result Retrieval Thread
+├── Worker Monitor Thread
+├── Connection Handler Thread (per client)
+├── Connection Handler Thread (per client)
+│   ...
+├── Worker Process 0 (forked)
+├── Worker Process 1 (forked)
+│   ...
+└── Worker Process N (forked)
+```
+
+### 6.2 Worker Isolation
+
+- Each worker is a separate process created via `fork()`
+- Workers have independent memory spaces (except shared memory regions)
+- GPU context is isolated per worker process
+- Worker crash does not affect main server or other workers
+- Workers are automatically restarted by monitor thread
+
+### 6.3 Resource Limits
+
+| Resource | Current Limit | Notes |
+|----------|---------------|-------|
+| Max concurrent jobs | 64 (`MAX_JOBS`) | Compile-time constant |
+| Max job data size | ~1MB (`MAX_JOB_DATA_SIZE`) | Per job |
+| Max result data size | ~1MB (`MAX_RESULT_DATA_SIZE`) | Per job |
+| Max workers | Configurable (`-w` flag) | Default: 1 |
+| Connection timeout | None | **Security Note** |
+
+---
+
+## 7. Security Considerations
+
+### 7.1 Current Security Posture
+
+| Aspect | Status | Risk Level |
+|--------|--------|------------|
+| Transport encryption (TLS) | Not implemented | **HIGH** |
+| Client authentication | Not implemented | **HIGH** |
+| Authorization | Not implemented | **MEDIUM** |
+| Input validation | Basic size checks | **MEDIUM** |
+| Rate limiting | Not implemented | **MEDIUM** |
+| Connection limits | Not implemented | **MEDIUM** |
+| Resource quotas | Not implemented | **LOW** |
+
+### 7.2 Trust Boundaries
+
+```
+┌──────────────────────────────────────────────────────────────────────┐
+│                         UNTRUSTED ZONE                               │
+│                                                                      │
+│   ┌─────────────┐                                                    │
+│   │   Client    │  Network boundary (no encryption, no auth)         │
+│   └──────┬──────┘                                                    │
+│          │                                                           │
+└──────────┼───────────────────────────────────────────────────────────┘
+           │ TCP Port 9090
+           ▼
+┌──────────────────────────────────────────────────────────────────────┐
+│                         TRUSTED ZONE                                 │
+│                                                                      │
+│   ┌──────────────────────────────────────────────────────────────┐   │
+│   │                    Server Process                            │   │
+│   │  - All clients treated equally                               │   │
+│   │  - No per-user isolation                                     │   │
+│   │  - Shared job queue visible to all workers                   │   │
+│   └──────────────────────────────────────────────────────────────┘   │
+│                                                                      │
+│   ┌──────────────────────────────────────────────────────────────┐   │
+│   │                    Worker Processes                          │   │
+│   │  - Process-level isolation from each other                   │   │
+│   │  - No sandboxing (full system access)                        │   │
+│   │  - GPU access                                                │   │
+│   └──────────────────────────────────────────────────────────────┘   │
+│                                                                      │
+└──────────────────────────────────────────────────────────────────────┘
+```
+
+### 7.3 Attack Surface
+
+| Attack Vector | Description | Mitigation |
+|---------------|-------------|------------|
+| Network eavesdropping | Plaintext TCP traffic | Requires TLS implementation |
+| Unauthorized access | No authentication | Requires auth mechanism |
+| Denial of service | No rate limiting, connection limits | Requires implementation |
+| Malformed input | Invalid protobuf/msgpack | Protobuf parsing with error handling |
+| Resource exhaustion | Large jobs, many connections | Size limits, but no connection limits |
+| Job ID guessing | Sequential-ish IDs | Uses random hex (128-bit) |
+| Worker escape | Malicious solver code | Workers are forked, not sandboxed |
+
+### 7.4 Recommended Security Enhancements
+
+1. **TLS/SSL**: Add transport encryption for all client-server communication
+2. **Authentication**: Implement API key or certificate-based authentication
+3. **Authorization**: Per-user job isolation, prevent access to other users' jobs
+4. **Rate Limiting**: Limit requests per client per time window
+5. **Connection Limits**: Maximum concurrent connections per IP
+6. **Input Validation**: Deep validation of problem data before solving
+7. **Audit Logging**: Log all operations with client identity
+8. **Sandboxing**: Consider containerization or seccomp for workers
+
+---
+
+## 8. Data Flow Summary
+
+### 8.1 Problem Data Flow
+
+```
+Client Problem Data
+        │
+        ▼
+┌───────────────────┐
+│   Serialization   │  Client-side: optimization_problem_t → protobuf bytes
+└─────────┬─────────┘
+          │
+          ▼ (TCP)
+┌───────────────────┐
+│   Job Queue       │  Stored in shared memory (~1MB max)
+└─────────┬─────────┘
+          │
+          ▼ (shm read)
+┌───────────────────┐
+│ Worker Process    │  Deserialize, create GPU data structures
+└─────────┬─────────┘
+          │
+          ▼
+┌───────────────────┐
+│   GPU Solver      │  PDLP or MIP solver execution
+└─────────┬─────────┘
+          │
+          ▼
+┌───────────────────┐
+│  Solution         │  GPU → Host copy, serialize to bytes
+└─────────┬─────────┘
+          │
+          ▼ (shm write)
+┌───────────────────┐
+│   Result Queue    │  Stored in shared memory (~1MB max)
+└─────────┬─────────┘
+          │
+          ▼ (TCP)
+┌───────────────────┐
+│   Client          │  Deserialize: protobuf bytes → solution_t
+└───────────────────┘
+```
+
+---
+
+## 9. Configuration
+
+### 9.1 Server Command-Line Options
+
+```
+Usage: cuopt_remote_server [options]
+Options:
+  -p PORT       Port to listen on (default: 9090)
+  -w WORKERS    Number of worker processes (default: 1)
+  -v            Verbose logging
+  -q            Quiet mode (minimal output)
+  --no-logs     Disable log streaming feature
+```
+
+### 9.2 Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `CUOPT_REMOTE_USE_SYNC` | Force synchronous mode (bypass job queue) |
+
+---
+
+## 10. File Locations
+
+| Path | Description |
+|------|-------------|
+| `/tmp/cuopt_logs/log_{job_id}` | Per-job solver log files |
+| `/dev/shm/cuopt_job_queue` | Job queue shared memory |
+| `/dev/shm/cuopt_result_queue` | Result queue shared memory |
+| `/dev/shm/cuopt_control` | Control block shared memory |
+
+---
+
+## Appendix A: Protocol Buffer Schema
+
+See `cpp/src/linear_programming/utilities/cuopt_remote.proto` for the complete schema definition.
+
+Key messages:
+- `AsyncRequest`: Wrapper for all request types
+- `AsyncResponse`: Wrapper for all response types
+- `OptimizationProblem`: LP/MIP problem definition
+- `LPSolution` / `MIPSolution`: Solution data
+- `PDLPSolverSettings` / `MIPSolverSettings`: Solver configuration
+
+---
+
+## Appendix B: Pluggable Serialization
+
+The server supports custom serialization formats via a plugin interface:
+
+```cpp
+template <typename i_t, typename f_t>
+class remote_serializer_t {
+    // Serialize optimization problem
+    virtual std::vector<uint8_t> serialize_lp_request(...) = 0;
+
+    // Deserialize solution
+    virtual lp_solution_t<i_t, f_t> deserialize_lp_solution(...) = 0;
+
+    // ... additional methods for async protocol
+};
+```
+
+Built-in serializers:
+- **Protobuf** (default): High performance, schema-based
+- **MsgPack**: Lightweight, schema-less alternative
+
+---
+
+*Document Version: 1.0*
+*Last Updated: January 2026*
diff --git a/python/cuopt/cuopt/__init__.py b/python/cuopt/cuopt/__init__.py
index c6e9150c8..7ea141221 100644
--- a/python/cuopt/cuopt/__init__.py
+++ b/python/cuopt/cuopt/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 try:
@@ -9,5 +9,23 @@
     libcuopt.load_library()
     del libcuopt
 
-from cuopt import linear_programming, routing
 from cuopt._version import __git_commit__, __version__, __version_major_minor__
+
+# Lazy imports for linear_programming and routing modules
+# This allows cuopt to be imported on CPU-only hosts when remote solve is configured
+_submodules = ["linear_programming", "routing"]
+
+
+def __getattr__(name):
+    """Lazy import submodules to support CPU-only hosts with remote solve."""
+    if name in _submodules:
+        import importlib
+        return importlib.import_module(f"cuopt.{name}")
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+    return __all__ + _submodules
+
+
+__all__ = ["__git_commit__", "__version__", "__version_major_minor__"]
diff --git a/python/cuopt/cuopt/linear_programming/__init__.py b/python/cuopt/cuopt/linear_programming/__init__.py
index d267c2171..c16e7bd7f 100644
--- a/python/cuopt/cuopt/linear_programming/__init__.py
+++ b/python/cuopt/cuopt/linear_programming/__init__.py
@@ -1,9 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from cuopt.linear_programming import internals
 from cuopt.linear_programming.data_model import DataModel
 from cuopt.linear_programming.problem import Problem
+from cuopt.linear_programming.remote import CancelResult, JobStatus, cancel_job
 from cuopt.linear_programming.solution import Solution
 from cuopt.linear_programming.solver import BatchSolve, Solve
 from cuopt.linear_programming.solver_settings import (
diff --git a/python/cuopt/cuopt/linear_programming/remote.py b/python/cuopt/cuopt/linear_programming/remote.py
new file mode 100644
index 000000000..70a4ae382
--- /dev/null
+++ b/python/cuopt/cuopt/linear_programming/remote.py
@@ -0,0 +1,214 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Remote solve utilities for cuOpt.
+
+This module provides functions for interacting with a remote cuopt_remote_server,
+including job management operations like cancellation.
+"""
+
+import os
+import socket
+import struct
+from dataclasses import dataclass
+from enum import IntEnum
+from typing import Optional
+
+# Try to import the protobuf module - may not be available in all environments
+try:
+    import sys
+
+    # Add build directory for development
+    build_path = os.path.join(
+        os.path.dirname(__file__), "..", "..", "..", "..", "..", "cpp", "build"
+    )
+    if os.path.exists(build_path):
+        sys.path.insert(0, os.path.abspath(build_path))
+    import cuopt_remote_pb2 as pb
+
+    _HAS_PROTOBUF = True
+except ImportError:
+    _HAS_PROTOBUF = False
+
+
+class JobStatus(IntEnum):
+    """Status of a remote job."""
+
+    QUEUED = 0  # Job is waiting in queue
+    PROCESSING = 1  # Job is being processed by a worker
+    COMPLETED = 2  # Job completed successfully
+    FAILED = 3  # Job failed with an error
+    NOT_FOUND = 4  # Job ID not found on server
+    CANCELLED = 5  # Job was cancelled
+
+
+@dataclass
+class CancelResult:
+    """Result of a cancel job request."""
+
+    success: bool
+    message: str
+    job_status: JobStatus
+
+
+def get_remote_config() -> Optional[tuple]:
+    """Get remote server configuration from environment variables.
+
+    Returns
+    -------
+    tuple or None
+        (host, port) tuple if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set,
+        None otherwise.
+    """
+    host = os.environ.get("CUOPT_REMOTE_HOST", "")
+    port = os.environ.get("CUOPT_REMOTE_PORT", "")
+
+    if host and port:
+        try:
+            return (host, int(port))
+        except ValueError:
+            return None
+    return None
+
+
+def cancel_job(
+    job_id: str, host: Optional[str] = None, port: Optional[int] = None
+) -> CancelResult:
+    """Cancel a job on a remote cuopt_remote_server.
+
+    This function can cancel jobs that are queued (waiting for a worker) or
+    currently running. For running jobs, the worker process is killed and
+    automatically restarted by the server.
+
+    Parameters
+    ----------
+    job_id : str
+        The job ID to cancel (e.g., "job_1234567890abcdef")
+    host : str, optional
+        Server hostname. If not provided, uses CUOPT_REMOTE_HOST environment variable.
+    port : int, optional
+        Server port. If not provided, uses CUOPT_REMOTE_PORT environment variable.
+
+    Returns
+    -------
+    CancelResult
+        Result containing success status, message, and job status after cancellation.
+
+    Raises
+    ------
+    RuntimeError
+        If protobuf module is not available or connection fails.
+    ValueError
+        If host/port are not provided and environment variables are not set.
+
+    Examples
+    --------
+    >>> # Using environment variables
+    >>> import os
+    >>> os.environ['CUOPT_REMOTE_HOST'] = 'localhost'
+    >>> os.environ['CUOPT_REMOTE_PORT'] = '9090'
+    >>> result = cancel_job("job_1234567890abcdef")
+    >>> print(result.success, result.message)
+
+    >>> # Explicitly specifying host and port
+    >>> result = cancel_job("job_1234567890abcdef", host="192.168.1.100", port=9090)
+    """
+    if not _HAS_PROTOBUF:
+        raise RuntimeError(
+            "Protobuf module not available. Please install protobuf or ensure "
+            "cuopt_remote_pb2.py is in the Python path."
+        )
+
+    # Get host/port from parameters or environment
+    if host is None or port is None:
+        config = get_remote_config()
+        if config is None:
+            raise ValueError(
+                "Host and port must be provided or set via CUOPT_REMOTE_HOST "
+                "and CUOPT_REMOTE_PORT environment variables."
+            )
+        if host is None:
+            host = config[0]
+        if port is None:
+            port = config[1]
+
+    # Create cancel request
+    request = pb.AsyncRequest()
+    request.request_type = pb.CANCEL_JOB
+    request.job_id = job_id
+
+    try:
+        # Connect to server
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.settimeout(30.0)  # 30 second timeout
+        sock.connect((host, port))
+
+        # Send request (length-prefixed)
+        data = request.SerializeToString()
+        sock.sendall(struct.pack("<I", len(data)))
+        sock.sendall(data)
+
+        # Receive response
+        size_data = sock.recv(4)
+        if len(size_data) < 4:
+            sock.close()
+            return CancelResult(
+                success=False,
+                message="Failed to receive response size",
+                job_status=JobStatus.NOT_FOUND,
+            )
+
+        size = struct.unpack("<I", size_data)[0]
+        response_data = b""
+        while len(response_data) < size:
+            chunk = sock.recv(size - len(response_data))
+            if not chunk:
+                break
+            response_data += chunk
+
+        sock.close()
+
+        # Parse response
+        response = pb.AsyncResponse()
+        response.ParseFromString(response_data)
+
+        cancel_resp = response.cancel_response
+
+        # Map protobuf status to our enum
+        status_map = {
+            pb.QUEUED: JobStatus.QUEUED,
+            pb.PROCESSING: JobStatus.PROCESSING,
+            pb.COMPLETED: JobStatus.COMPLETED,
+            pb.FAILED: JobStatus.FAILED,
+            pb.NOT_FOUND: JobStatus.NOT_FOUND,
+            pb.CANCELLED: JobStatus.CANCELLED,
+        }
+        job_status = status_map.get(
+            cancel_resp.job_status, JobStatus.NOT_FOUND
+        )
+
+        return CancelResult(
+            success=(cancel_resp.status == pb.SUCCESS),
+            message=cancel_resp.message,
+            job_status=job_status,
+        )
+
+    except socket.timeout:
+        return CancelResult(
+            success=False,
+            message="Connection timed out",
+            job_status=JobStatus.NOT_FOUND,
+        )
+    except socket.error as e:
+        return CancelResult(
+            success=False,
+            message=f"Connection error: {e}",
+            job_status=JobStatus.NOT_FOUND,
+        )
+    except Exception as e:
+        return CancelResult(
+            success=False,
+            message=f"Error: {e}",
+            job_status=JobStatus.NOT_FOUND,
+        )
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
index c140e3d0c..9e211e212 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd
+++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -120,9 +120,16 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_solution.hpp" namespace "
 
 cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace "cuopt::cython": # noqa
     cdef cppclass linear_programming_ret_t:
+        # GPU (device) storage
         unique_ptr[device_buffer] primal_solution_
         unique_ptr[device_buffer] dual_solution_
         unique_ptr[device_buffer] reduced_cost_
+        # CPU (host) storage for remote solve
+        vector[double] primal_solution_host_
+        vector[double] dual_solution_host_
+        vector[double] reduced_cost_host_
+        # Flag indicating where solution data is stored
+        bool is_device_memory_
         # PDLP warm start data
         unique_ptr[device_buffer] current_primal_solution_
         unique_ptr[device_buffer] current_dual_solution_
@@ -155,7 +162,12 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         bool solved_by_pdlp_
 
     cdef cppclass mip_ret_t:
+        # GPU (device) storage
         unique_ptr[device_buffer] solution_
+        # CPU (host) storage for remote solve
+        vector[double] solution_host_
+        # Flag indicating where solution data is stored
+        bool is_device_memory_
         mip_termination_status_t termination_status_
         error_type_t error_status_
         string error_message_
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
index 1991af0d6..adcaf9dfc 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -13,8 +13,6 @@ from datetime import date, datetime
 
 from dateutil.relativedelta import relativedelta
 
-from cuopt.utilities import type_cast
-
 from libc.stdint cimport uintptr_t
 from libc.stdlib cimport free, malloc
 from libc.string cimport memcpy, strcpy, strlen
@@ -42,25 +40,56 @@ from cuopt.linear_programming.solver.solver cimport (
 )
 
 import math
+import os
 import sys
 import warnings
 from enum import IntEnum
 
-import cupy as cp
 import numpy as np
-from numba import cuda
-
-import cudf
 
 from cuopt.linear_programming.solver_settings.solver_settings import (
     PDLPSolverMode,
     SolverSettings,
 )
-from cuopt.utilities import InputValidationError, series_from_buf
+from cuopt.utilities import InputValidationError
 
 import pyarrow as pa
 
 
+def is_remote_solve_enabled():
+    """Check if remote solve is enabled via environment variables.
+
+    Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT
+    environment variables are set and valid.
+
+    Returns
+    -------
+    bool
+        True if remote solve is enabled, False otherwise.
+    """
+    host = os.environ.get("CUOPT_REMOTE_HOST", "")
+    port = os.environ.get("CUOPT_REMOTE_PORT", "")
+
+    if host and port:
+        try:
+            int(port)  # Validate port is a valid integer
+            return True
+        except ValueError:
+            return False
+    return False
+
+
+def _get_cuda_imports():
+    """Lazily import CUDA-dependent modules.
+
+    Only call this when GPU operations are actually needed.
+    """
+    import cupy as cp
+    import cudf
+    from cuopt.utilities import series_from_buf
+    return cp, cudf, series_from_buf
+
+
 cdef extern from "cuopt/linear_programming/utilities/internals.hpp" namespace "cuopt::internals": # noqa
     cdef cppclass base_solution_callback_t
 
@@ -108,39 +137,52 @@ cdef char* c_get_string(string in_str):
 
 
 def get_data_ptr(array):
-    if isinstance(array, cudf.Series):
-        return array.__cuda_array_interface__['data'][0]
-    elif isinstance(array, np.ndarray):
+    """Get the data pointer from an array.
+
+    Works with both numpy arrays (CPU) and cudf Series (GPU).
+    """
+    if isinstance(array, np.ndarray):
         return array.__array_interface__['data'][0]
+    elif hasattr(array, '__cuda_array_interface__'):
+        # cudf.Series or other CUDA array
+        return array.__cuda_array_interface__['data'][0]
     else:
         raise Exception(
             "get_data_ptr must be called with cudf.Series or np.ndarray"
         )
 
 
-def type_cast(cudf_obj, np_type, name):
-    if isinstance(cudf_obj, cudf.Series):
-        cudf_type = cudf_obj.dtype
-    elif isinstance(cudf_obj, np.ndarray):
-        cudf_type = cudf_obj.dtype
-    elif isinstance(cudf_obj, cudf.DataFrame):
-        if all([np.issubdtype(dtype, np.number) for dtype in cudf_obj.dtypes]):  # noqa
-            cudf_type = cudf_obj.dtypes[0]
+def type_cast(obj, np_type, name):
+    """Cast array to the specified numpy type.
+
+    Works with both numpy arrays and cudf objects.
+    """
+    if isinstance(obj, np.ndarray):
+        obj_type = obj.dtype
+    elif hasattr(obj, 'dtype'):
+        obj_type = obj.dtype
+    elif hasattr(obj, 'dtypes'):
+        # DataFrame-like object
+        if all([np.issubdtype(dtype, np.number) for dtype in obj.dtypes]):  # noqa
+            obj_type = obj.dtypes[0]
         else:
             msg = "All columns in " + name + " should be numeric"
             raise Exception(msg)
+    else:
+        obj_type = type(obj)
+
     if ((np.issubdtype(np_type, np.floating) and
-         (not np.issubdtype(cudf_type, np.floating)))
+         (not np.issubdtype(obj_type, np.floating)))
        or (np.issubdtype(np_type, np.integer) and
-           (not np.issubdtype(cudf_type, np.integer)))
+           (not np.issubdtype(obj_type, np.integer)))
        or (np.issubdtype(np_type, np.bool_) and
-           (not np.issubdtype(cudf_type, np.bool_)))
+           (not np.issubdtype(obj_type, np.bool_)))
        or (np.issubdtype(np_type, np.int8) and
-           (not np.issubdtype(cudf_type, np.int8)))):
-        msg = "Casting " + name + " from " + str(cudf_type) + " to " + str(np.dtype(np_type))  # noqa
+           (not np.issubdtype(obj_type, np.int8)))):
+        msg = "Casting " + name + " from " + str(obj_type) + " to " + str(np.dtype(np_type))  # noqa
         warnings.warn(msg)
-    cudf_obj = cudf_obj.astype(np.dtype(np_type))
-    return cudf_obj
+    obj = obj.astype(np.dtype(np_type))
+    return obj
 
 
 cdef set_solver_setting(
@@ -291,6 +333,12 @@ cdef set_solver_setting(
             settings.get_pdlp_warm_start_data().iterations_since_last_restart # noqa
         )
 
+cdef _convert_device_buffer_to_numpy(device_buffer, dtype):
+    """Convert a DeviceBuffer to numpy array using lazy CUDA imports."""
+    _, _, series_from_buf = _get_cuda_imports()
+    return series_from_buf(device_buffer, dtype).to_numpy()
+
+
 cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
                      DataModel data_model_obj,
                      is_batch=False):
@@ -300,9 +348,17 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
     sol_ret = move(sol_ret_ptr.get()[0])
 
     if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa
-        solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.mip_ret.solution_)
-        )
+        # Check if data is on GPU or CPU
+        if sol_ret.mip_ret.is_device_memory_:
+            # GPU data - use DeviceBuffer with lazy imports
+            solution = DeviceBuffer.c_from_unique_ptr(
+                move(sol_ret.mip_ret.solution_)
+            )
+            solution = _convert_device_buffer_to_numpy(solution, pa.float64())
+        else:
+            # CPU data - convert vector directly to numpy
+            solution = np.array(sol_ret.mip_ret.solution_host_, dtype=np.float64)
+
         termination_status = sol_ret.mip_ret.termination_status_
         error_status = sol_ret.mip_ret.error_status_
         error_message = sol_ret.mip_ret.error_message_
@@ -317,8 +373,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         num_nodes = sol_ret.mip_ret.nodes_
         num_simplex_iterations = sol_ret.mip_ret.simplex_iterations_
 
-        solution = series_from_buf(solution, pa.float64()).to_numpy()
-
         return Solution(
             ProblemCategory(sol_ret.problem_type),
             dict(zip(data_model_obj.get_variable_names(), solution)),
@@ -339,15 +393,23 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         )
 
     else:
-        primal_solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.lp_ret.primal_solution_)
-        )
-        dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
-        reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
+        # Check if data is on GPU or CPU
+        if sol_ret.lp_ret.is_device_memory_:
+            # GPU data - use DeviceBuffer with lazy imports
+            primal_solution = DeviceBuffer.c_from_unique_ptr(
+                move(sol_ret.lp_ret.primal_solution_)
+            )
+            dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
+            reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
 
-        primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy()
-        dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy()
-        reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy()
+            primal_solution = _convert_device_buffer_to_numpy(primal_solution, pa.float64())
+            dual_solution = _convert_device_buffer_to_numpy(dual_solution, pa.float64())
+            reduced_cost = _convert_device_buffer_to_numpy(reduced_cost, pa.float64())
+        else:
+            # CPU data - convert vectors directly to numpy
+            primal_solution = np.array(sol_ret.lp_ret.primal_solution_host_, dtype=np.float64)
+            dual_solution = np.array(sol_ret.lp_ret.dual_solution_host_, dtype=np.float64)
+            reduced_cost = np.array(sol_ret.lp_ret.reduced_cost_host_, dtype=np.float64)
 
         termination_status = sol_ret.lp_ret.termination_status_
         error_status = sol_ret.lp_ret.error_status_
@@ -363,33 +425,77 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
 
         # In BatchSolve, we don't get the warm start data
         if not is_batch:
-            current_primal_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_primal_solution_)
-            )
-            current_dual_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_dual_solution_)
-            )
-            initial_primal_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_primal_average_)
-            )
-            initial_dual_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_dual_average_)
-            )
-            current_ATY = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_ATY_)
-            )
-            sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_primal_solutions_)
-            )
-            sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_dual_solutions_)
-            )
-            last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
-            )
-            last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
-            )
+            # Warm start data is only available for GPU solves
+            if sol_ret.lp_ret.is_device_memory_:
+                current_primal_solution = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_primal_solution_)
+                )
+                current_dual_solution = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_dual_solution_)
+                )
+                initial_primal_average = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.initial_primal_average_)
+                )
+                initial_dual_average = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.initial_dual_average_)
+                )
+                current_ATY = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_ATY_)
+                )
+                sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.sum_primal_solutions_)
+                )
+                sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.sum_dual_solutions_)
+                )
+                last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
+                    move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
+                )
+                last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
+                    move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
+                )
+
+                current_primal_solution = _convert_device_buffer_to_numpy(
+                    current_primal_solution, pa.float64()
+                )
+                current_dual_solution = _convert_device_buffer_to_numpy(
+                    current_dual_solution, pa.float64()
+                )
+                initial_primal_average = _convert_device_buffer_to_numpy(
+                    initial_primal_average, pa.float64()
+                )
+                initial_dual_average = _convert_device_buffer_to_numpy(
+                    initial_dual_average, pa.float64()
+                )
+                current_ATY = _convert_device_buffer_to_numpy(
+                    current_ATY, pa.float64()
+                )
+                sum_primal_solutions = _convert_device_buffer_to_numpy(
+                    sum_primal_solutions, pa.float64()
+                )
+                sum_dual_solutions = _convert_device_buffer_to_numpy(
+                    sum_dual_solutions, pa.float64()
+                )
+                last_restart_duality_gap_primal_solution = _convert_device_buffer_to_numpy(
+                    last_restart_duality_gap_primal_solution,
+                    pa.float64()
+                )
+                last_restart_duality_gap_dual_solution = _convert_device_buffer_to_numpy(
+                    last_restart_duality_gap_dual_solution,
+                    pa.float64()
+                )
+            else:
+                # CPU/remote solve - no warm start data available
+                current_primal_solution = np.array([], dtype=np.float64)
+                current_dual_solution = np.array([], dtype=np.float64)
+                initial_primal_average = np.array([], dtype=np.float64)
+                initial_dual_average = np.array([], dtype=np.float64)
+                current_ATY = np.array([], dtype=np.float64)
+                sum_primal_solutions = np.array([], dtype=np.float64)
+                sum_dual_solutions = np.array([], dtype=np.float64)
+                last_restart_duality_gap_primal_solution = np.array([], dtype=np.float64)
+                last_restart_duality_gap_dual_solution = np.array([], dtype=np.float64)
+
             initial_primal_weight = sol_ret.lp_ret.initial_primal_weight_
             initial_step_size = sol_ret.lp_ret.initial_step_size_
             total_pdlp_iterations = sol_ret.lp_ret.total_pdlp_iterations_
@@ -399,36 +505,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
             sum_solution_weight = sol_ret.lp_ret.sum_solution_weight_
             iterations_since_last_restart = sol_ret.lp_ret.iterations_since_last_restart_ # noqa
 
-            current_primal_solution = series_from_buf(
-                current_primal_solution, pa.float64()
-            ).to_numpy()
-            current_dual_solution = series_from_buf(
-                current_dual_solution, pa.float64()
-            ).to_numpy()
-            initial_primal_average = series_from_buf(
-                initial_primal_average, pa.float64()
-            ).to_numpy()
-            initial_dual_average = series_from_buf(
-                initial_dual_average, pa.float64()
-            ).to_numpy()
-            current_ATY = series_from_buf(
-                current_ATY, pa.float64()
-            ).to_numpy()
-            sum_primal_solutions = series_from_buf(
-                sum_primal_solutions, pa.float64()
-            ).to_numpy()
-            sum_dual_solutions = series_from_buf(
-                sum_dual_solutions, pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_primal_solution = series_from_buf(
-                last_restart_duality_gap_primal_solution,
-                pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_dual_solution = series_from_buf(
-                last_restart_duality_gap_dual_solution,
-                pa.float64()
-            ).to_numpy()
-
             return Solution(
                 ProblemCategory(sol_ret.problem_type),
                 dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa
diff --git a/python/cuopt/cuopt/utilities/__init__.py b/python/cuopt/cuopt/utilities/__init__.py
index 8706b9451..445fe5101 100644
--- a/python/cuopt/cuopt/utilities/__init__.py
+++ b/python/cuopt/cuopt/utilities/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from cuopt.utilities.exception_handler import (
@@ -7,5 +7,20 @@
     OutOfMemoryError,
     catch_cuopt_exception,
 )
-from cuopt.utilities.type_casting import type_cast
-from cuopt.utilities.utils import check_solution, series_from_buf
+
+# Lazy imports for CUDA-dependent modules to support CPU-only hosts
+# These will be imported when first accessed
+
+
+def __getattr__(name):
+    """Lazy import CUDA-dependent utilities."""
+    if name == "type_cast":
+        from cuopt.utilities.type_casting import type_cast
+        return type_cast
+    elif name == "series_from_buf":
+        from cuopt.utilities.utils import series_from_buf
+        return series_from_buf
+    elif name == "check_solution":
+        from cuopt.utilities.utils import check_solution
+        return check_solution
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/python/cuopt/cuopt/utilities/utils.py b/python/cuopt/cuopt/utilities/utils.py
index b92968d0e..5b8d46b69 100644
--- a/python/cuopt/cuopt/utilities/utils.py
+++ b/python/cuopt/cuopt/utilities/utils.py
@@ -1,11 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
 
-import cudf
-import pylibcudf as plc
-
 from cuopt.linear_programming.solver.solver_parameters import (
     CUOPT_ABSOLUTE_PRIMAL_TOLERANCE,
     CUOPT_MIP_INTEGRALITY_TOLERANCE,
@@ -16,6 +13,9 @@
 def series_from_buf(buf, dtype):
     """Helper function to create a cudf series from a buffer.
 
+    This function lazily imports cudf and pylibcudf to support
+    CPU-only execution when remote solve is enabled.
+
     Parameters
     ----------
     buf : cudf.core.buffer.Buffer
@@ -28,6 +28,10 @@ def series_from_buf(buf, dtype):
     cudf.Series
         A cudf Series built from the buffer
     """
+    # Lazy imports to support CPU-only hosts with remote solve
+    import cudf
+    import pylibcudf as plc
+
     col = plc.column.Column.from_rmm_buffer(
         buf,
         dtype=plc.types.DataType.from_arrow(dtype),
diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml
index 7a7d84b8f..1091d12a9 100644
--- a/python/libcuopt/pyproject.toml
+++ b/python/libcuopt/pyproject.toml
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 [build-system]
@@ -84,5 +84,6 @@ requires = [
     "cuopt-mps-parser==26.2.*,>=0.0.0a0",
     "librmm==26.2.*,>=0.0.0a0",
     "ninja",
+    "protobuf",
     "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.