From 652596183678e423cd978bb5c069a14718d4e5ea Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 22 Dec 2025 15:34:18 -0500 Subject: [PATCH 01/37] Add data_model_view_t alias in cuopt::linear_programming namespace This adds a header that provides convenient aliases for data_model_view_t and span in the cuopt::linear_programming namespace. The canonical implementation remains in cuopt::mps_parser to maintain mps_parser as a standalone library. --- .../linear_programming/data_model_view.hpp | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 cpp/include/cuopt/linear_programming/data_model_view.hpp diff --git a/cpp/include/cuopt/linear_programming/data_model_view.hpp b/cpp/include/cuopt/linear_programming/data_model_view.hpp new file mode 100644 index 000000000..413987227 --- /dev/null +++ b/cpp/include/cuopt/linear_programming/data_model_view.hpp @@ -0,0 +1,58 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +/** + * @file data_model_view.hpp + * @brief Provides data_model_view_t in the cuopt::linear_programming namespace. + * + * This header provides access to the data_model_view_t class, a non-owning view + * over LP/MIP problem data. The view uses span to hold pointers that can + * reference either host or device memory, making it suitable for both local + * GPU-based solves and remote CPU-based solves. + * + * The canonical implementation lives in cuopt::mps_parser for historical reasons + * and to maintain mps_parser as a standalone library. This header provides + * convenient aliases in the cuopt::linear_programming namespace. + */ + +#include +#include + +namespace cuopt::linear_programming { + +/** + * @brief Non-owning span type that can point to either host or device memory. + * + * This is an alias to the span type defined in mps_parser. The span holds + * a pointer and size, but does not own the underlying memory. + * + * @tparam T Element type + */ +template +using span = cuopt::mps_parser::span; + +/** + * @brief Non-owning view of LP/MIP problem data. + * + * This is an alias to the data_model_view_t defined in mps_parser. + * The view stores problem data (constraint matrix, bounds, objective, etc.) + * as span members, which can point to either host or device memory. + * + * Key features for remote solve support: + * - Non-owning: does not allocate or free memory + * - Memory-agnostic: spans can point to host OR device memory + * - Serializable: host data can be directly serialized for remote solve + * + * @tparam i_t Integer type for indices (typically int) + * @tparam f_t Floating point type for values (typically float or double) + */ +template +using data_model_view_t = cuopt::mps_parser::data_model_view_t; + +} // namespace cuopt::linear_programming From bd1f474fd67dcb0acd23936064221ddf68cc80a3 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Wed, 24 Dec 2025 10:56:17 -0500 Subject: [PATCH 02/37] C API: Optimized memory storage for local vs remote solve - Refactored problem_and_stream_view_t to use unique_ptr for both CPU and GPU storage - Check is_remote_solve_enabled() at problem creation time - Local solve: store problem directly in GPU memory (optimization_problem_t) - Remote solve: store problem in CPU memory (problem_cpu_data_t) - cuOptSolve routes to appropriate path based on is_remote_configured flag - Updated all cuOptGet* functions to read from correct storage - No memory overhead: only one storage type is allocated per problem - Maintains backward compatibility with existing C API --- cpp/cuopt_cli.cpp | 135 ++- .../cuopt/linear_programming/solve.hpp | 72 ++ .../utilities/remote_solve.hpp | 57 ++ cpp/src/linear_programming/cuopt_c.cpp | 937 +++++++++++++----- cpp/src/linear_programming/solve.cu | 313 +++++- cpp/src/mip/solve.cu | 123 +++ 6 files changed, 1376 insertions(+), 261 deletions(-) create mode 100644 cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index 5023cefc6..a3bc29eac 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -5,6 +5,7 @@ */ /* clang-format on */ +#include #include #include #include @@ -66,6 +67,108 @@ static char cuda_module_loading_env[] = "CUDA_MODULE_LOADING=EAGER"; */ inline auto make_async() { return std::make_shared(); } +/** + * @brief Create a data_model_view_t from mps_data_model_t + * + * This creates a non-owning view with spans pointing to the CPU data in the mps_data_model. + * Used for remote solve where data stays in CPU memory. + * + * @param mps_data_model The owning mps_data_model_t + * @return data_model_view_t with spans pointing to the mps_data_model's vectors + */ +template +cuopt::linear_programming::data_model_view_t create_view_from_mps_data_model( + const cuopt::mps_parser::mps_data_model_t& mps_data_model) +{ + cuopt::linear_programming::data_model_view_t view; + + view.set_maximize(mps_data_model.get_sense()); + + if (!mps_data_model.get_constraint_matrix_values().empty()) { + view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(), + mps_data_model.get_constraint_matrix_values().size(), + mps_data_model.get_constraint_matrix_indices().data(), + mps_data_model.get_constraint_matrix_indices().size(), + mps_data_model.get_constraint_matrix_offsets().data(), + mps_data_model.get_constraint_matrix_offsets().size()); + } + + if (!mps_data_model.get_constraint_bounds().empty()) { + view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(), + mps_data_model.get_constraint_bounds().size()); + } + + if (!mps_data_model.get_objective_coefficients().empty()) { + view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(), + mps_data_model.get_objective_coefficients().size()); + } + + view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor()); + view.set_objective_offset(mps_data_model.get_objective_offset()); + + if (!mps_data_model.get_variable_lower_bounds().empty()) { + view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(), + mps_data_model.get_variable_lower_bounds().size()); + } + + if (!mps_data_model.get_variable_upper_bounds().empty()) { + view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(), + mps_data_model.get_variable_upper_bounds().size()); + } + + if (!mps_data_model.get_variable_types().empty()) { + view.set_variable_types(mps_data_model.get_variable_types().data(), + mps_data_model.get_variable_types().size()); + } + + if (!mps_data_model.get_row_types().empty()) { + view.set_row_types(mps_data_model.get_row_types().data(), + mps_data_model.get_row_types().size()); + } + + if (!mps_data_model.get_constraint_lower_bounds().empty()) { + view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(), + mps_data_model.get_constraint_lower_bounds().size()); + } + + if (!mps_data_model.get_constraint_upper_bounds().empty()) { + view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(), + mps_data_model.get_constraint_upper_bounds().size()); + } + + view.set_objective_name(mps_data_model.get_objective_name()); + view.set_problem_name(mps_data_model.get_problem_name()); + + if (!mps_data_model.get_variable_names().empty()) { + view.set_variable_names(mps_data_model.get_variable_names()); + } + + if (!mps_data_model.get_row_names().empty()) { + view.set_row_names(mps_data_model.get_row_names()); + } + + if (!mps_data_model.get_initial_primal_solution().empty()) { + view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(), + mps_data_model.get_initial_primal_solution().size()); + } + + if (!mps_data_model.get_initial_dual_solution().empty()) { + view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(), + mps_data_model.get_initial_dual_solution().size()); + } + + if (mps_data_model.has_quadratic_objective()) { + view.set_quadratic_objective_matrix(mps_data_model.get_quadratic_objective_values().data(), + mps_data_model.get_quadratic_objective_values().size(), + mps_data_model.get_quadratic_objective_indices().data(), + mps_data_model.get_quadratic_objective_indices().size(), + mps_data_model.get_quadratic_objective_offsets().data(), + mps_data_model.get_quadratic_objective_offsets().size()); + } + + return view; +} + /** * @brief Handle logger when error happens before logger is initialized * @param settings Solver settings @@ -122,13 +225,15 @@ int run_single_file(const std::string& file_path, return -1; } - auto op_problem = - cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model); - - const bool is_mip = - (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP || - op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) && - !solve_relaxation; + // Determine if this is a MIP problem by checking variable types + bool has_integers = false; + for (const auto& vt : mps_data_model.get_variable_types()) { + if (vt == 'I' || vt == 'B') { + has_integers = true; + break; + } + } + const bool is_mip = has_integers && !solve_relaxation; try { auto initial_solution = @@ -154,13 +259,25 @@ int run_single_file(const std::string& file_path, return -1; } + // Create a non-owning view from the mps_data_model + // solve_lp/solve_mip will handle remote vs local solve based on env vars + auto view = create_view_from_mps_data_model(mps_data_model); + try { if (is_mip) { auto& mip_settings = settings.get_mip_settings(); - auto solution = cuopt::linear_programming::solve_mip(op_problem, mip_settings); + auto solution = cuopt::linear_programming::solve_mip(&handle_, view, mip_settings); + if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) { + CUOPT_LOG_ERROR("MIP solve failed: %s", solution.get_error_status().what()); + return -1; + } } else { auto& lp_settings = settings.get_pdlp_settings(); - auto solution = cuopt::linear_programming::solve_lp(op_problem, lp_settings); + auto solution = cuopt::linear_programming::solve_lp(&handle_, view, lp_settings); + if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) { + CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what()); + return -1; + } } } catch (const std::exception& e) { CUOPT_LOG_ERROR("Error: %s", e.what()); diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp index 364fee30a..e06c69a03 100644 --- a/cpp/include/cuopt/linear_programming/solve.hpp +++ b/cpp/include/cuopt/linear_programming/solve.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -107,4 +109,74 @@ optimization_problem_t mps_data_model_to_optimization_problem( raft::handle_t const* handle_ptr, const cuopt::mps_parser::mps_data_model_t& data_model); +/** + * @brief Convert a data_model_view_t to an optimization_problem_t. + * + * This function copies data from the view (which points to GPU memory) + * into an owning optimization_problem_t. + * + * @tparam i_t Data type of indexes + * @tparam f_t Data type of the variables and their weights in the equations + * + * @param[in] handle_ptr A raft::handle_t object with its corresponding CUDA stream. + * @param[in] view A data_model_view_t object with spans pointing to GPU memory + * @return optimization_problem_t owning container for the problem + */ +template +optimization_problem_t data_model_view_to_optimization_problem( + raft::handle_t const* handle_ptr, const data_model_view_t& view); + +/** + * @brief Linear programming solve function using data_model_view_t. + * + * This overload accepts a non-owning data_model_view_t which can point to either + * GPU memory (for local solves) or CPU memory (for remote solves). + * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST + * and CUOPT_REMOTE_PORT environment variables. + * + * @note Both primal and dual solutions are zero-initialized. + * + * @tparam i_t Data type of indexes + * @tparam f_t Data type of the variables and their weights in the equations + * + * @param[in] handle_ptr A raft::handle_t object with its corresponding CUDA stream. + * @param[in] view A data_model_view_t with spans pointing to problem data + * @param[in] settings A pdlp_solver_settings_t object with the settings for the PDLP + * solver. + * @param[in] problem_checking If true, the problem is checked for consistency. + * @param[in] use_pdlp_solver_mode If true, the PDLP hyperparameters coming from the + * pdlp_solver_mode are used. + * @return optimization_problem_solution_t owning container for the solver solution + */ +template +optimization_problem_solution_t solve_lp( + raft::handle_t const* handle_ptr, + const data_model_view_t& view, + pdlp_solver_settings_t const& settings = pdlp_solver_settings_t{}, + bool problem_checking = true, + bool use_pdlp_solver_mode = true); + +/** + * @brief Mixed integer programming solve function using data_model_view_t. + * + * This overload accepts a non-owning data_model_view_t which can point to either + * GPU memory (for local solves) or CPU memory (for remote solves). + * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST + * and CUOPT_REMOTE_PORT environment variables. + * + * @tparam i_t Data type of indexes + * @tparam f_t Data type of the variables and their weights in the equations + * + * @param[in] handle_ptr A raft::handle_t object with its corresponding CUDA stream. + * @param[in] view A data_model_view_t with spans pointing to problem data + * @param[in] settings A mip_solver_settings_t object with the settings for the MIP + * solver. + * @return mip_solution_t owning container for the solver solution + */ +template +mip_solution_t solve_mip( + raft::handle_t const* handle_ptr, + const data_model_view_t& view, + mip_solver_settings_t const& settings = mip_solver_settings_t{}); + } // namespace cuopt::linear_programming diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp new file mode 100644 index 000000000..dab725823 --- /dev/null +++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp @@ -0,0 +1,57 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +namespace cuopt::linear_programming { + +/** + * @brief Configuration for remote solve connection + */ +struct remote_solve_config_t { + std::string host; + int port; +}; + +/** + * @brief Check if remote solve is enabled via environment variables. + * + * Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT + * environment variables are set. + * + * @return std::optional containing the remote config if + * remote solve is enabled, std::nullopt otherwise + */ +inline std::optional get_remote_solve_config() +{ + const char* host = std::getenv("CUOPT_REMOTE_HOST"); + const char* port = std::getenv("CUOPT_REMOTE_PORT"); + + if (host != nullptr && port != nullptr && host[0] != '\0' && port[0] != '\0') { + try { + int port_num = std::stoi(port); + return remote_solve_config_t{std::string(host), port_num}; + } catch (...) { + // Invalid port number, fall back to local solve + return std::nullopt; + } + } + return std::nullopt; +} + +/** + * @brief Check if remote solve is enabled. + * + * @return true if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are both set + */ +inline bool is_remote_solve_enabled() { return get_remote_solve_config().has_value(); } + +} // namespace cuopt::linear_programming diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp index 0772dd14b..f40f5928c 100644 --- a/cpp/src/linear_programming/cuopt_c.cpp +++ b/cpp/src/linear_programming/cuopt_c.cpp @@ -7,9 +7,11 @@ #include +#include #include #include #include +#include #include #include @@ -17,20 +19,166 @@ #include +#include + #include #include #include +#include using namespace cuopt::mps_parser; using namespace cuopt::linear_programming; +/** + * @brief CPU-side storage for problem data. + * + * This struct stores all problem data in CPU memory. At solve time, a data_model_view_t + * is created pointing to this data, and the solve_lp/solve_mip routines handle + * local vs remote solve automatically. + */ +struct problem_cpu_data_t { + // Problem dimensions + cuopt_int_t num_constraints = 0; + cuopt_int_t num_variables = 0; + + // Objective + bool maximize = false; + cuopt_float_t objective_offset = 0.0; + std::vector objective_coefficients; + + // Quadratic objective (optional) + std::vector Q_values; + std::vector Q_indices; + std::vector Q_offsets; + + // Constraint matrix (CSR format) + std::vector A_values; + std::vector A_indices; + std::vector A_offsets; + + // Constraint bounds (two representations) + std::vector row_types; // '<', '>', '=' style + std::vector constraint_bounds; // single RHS for row_types style + std::vector constraint_lower_bounds; // ranged style + std::vector constraint_upper_bounds; // ranged style + bool uses_ranged_constraints = false; + + // Variable bounds + std::vector variable_lower_bounds; + std::vector variable_upper_bounds; + + // Variable types + std::vector variable_types; // 'C' for continuous, 'I' for integer + + /** + * @brief Create a data_model_view_t pointing to this CPU data. + */ + cuopt::linear_programming::data_model_view_t create_view() const + { + cuopt::linear_programming::data_model_view_t view; + + view.set_maximize(maximize); + view.set_objective_offset(objective_offset); + + if (!objective_coefficients.empty()) { + view.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + } + + if (!Q_values.empty()) { + view.set_quadratic_objective_matrix(Q_values.data(), + Q_values.size(), + Q_indices.data(), + Q_indices.size(), + Q_offsets.data(), + Q_offsets.size()); + } + + if (!A_values.empty()) { + view.set_csr_constraint_matrix(A_values.data(), + A_values.size(), + A_indices.data(), + A_indices.size(), + A_offsets.data(), + A_offsets.size()); + } + + if (uses_ranged_constraints) { + if (!constraint_lower_bounds.empty()) { + view.set_constraint_lower_bounds(constraint_lower_bounds.data(), + constraint_lower_bounds.size()); + } + if (!constraint_upper_bounds.empty()) { + view.set_constraint_upper_bounds(constraint_upper_bounds.data(), + constraint_upper_bounds.size()); + } + } else { + if (!row_types.empty()) { view.set_row_types(row_types.data(), row_types.size()); } + if (!constraint_bounds.empty()) { + view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size()); + } + } + + if (!variable_lower_bounds.empty()) { + view.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size()); + } + + if (!variable_upper_bounds.empty()) { + view.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size()); + } + + if (!variable_types.empty()) { + view.set_variable_types(variable_types.data(), variable_types.size()); + } + + return view; + } + + /** + * @brief Check if this is a MIP (has integer variables). + */ + bool is_mip() const + { + for (char vt : variable_types) { + if (vt == CUOPT_INTEGER) { return true; } + } + return false; + } +}; + struct problem_and_stream_view_t { problem_and_stream_view_t() - : op_problem(nullptr), stream_view(rmm::cuda_stream_per_thread), handle(stream_view) + : is_remote_configured(false), + cpu_data(nullptr), + gpu_problem(nullptr), + stream_view(rmm::cuda_stream_per_thread), + handle(stream_view) { } + raft::handle_t* get_handle_ptr() { return &handle; } - cuopt::linear_programming::optimization_problem_t* op_problem; + + /** + * @brief Check if this is a MIP problem. + */ + bool is_mip() const + { + if (is_remote_configured) { + return cpu_data && cpu_data->is_mip(); + } else { + if (!gpu_problem) return false; + auto cat = gpu_problem->get_problem_category(); + return (cat == problem_category_t::MIP) || (cat == problem_category_t::IP); + } + } + + // Remote solve configured at creation time + bool is_remote_configured; + + // Only ONE of these is allocated (optimized memory usage): + std::unique_ptr cpu_data; // for remote solve (CPU memory) + std::unique_ptr> + gpu_problem; // for local solve (GPU memory) + rmm::cuda_stream_view stream_view; raft::handle_t handle; }; @@ -77,6 +225,7 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro parse_mps(filename_str, input_mps_strict)); } catch (const std::exception& e) { CUOPT_LOG_INFO("Error parsing MPS file: %s", e.what()); + delete problem_and_stream; *problem_ptr = nullptr; if (std::string(e.what()).find("Error opening MPS file") != std::string::npos) { return CUOPT_MPS_FILE_ERROR; @@ -84,11 +233,59 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro return CUOPT_MPS_PARSE_ERROR; } } - optimization_problem_t* op_problem = - new optimization_problem_t(mps_data_model_to_optimization_problem( - problem_and_stream->get_handle_ptr(), *mps_data_model_ptr)); - problem_and_stream->op_problem = op_problem; - *problem_ptr = static_cast(problem_and_stream); + + // Check remote solve configuration at creation time + problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + + if (problem_and_stream->is_remote_configured) { + // Remote: store in CPU memory + problem_and_stream->cpu_data = std::make_unique(); + auto& cpu_data = *problem_and_stream->cpu_data; + const auto& mps = *mps_data_model_ptr; + + cpu_data.num_constraints = + static_cast(mps.get_constraint_matrix_offsets().size() - 1); + cpu_data.num_variables = static_cast(mps.get_objective_coefficients().size()); + cpu_data.maximize = mps.get_sense(); + cpu_data.objective_offset = mps.get_objective_offset(); + + cpu_data.objective_coefficients = mps.get_objective_coefficients(); + cpu_data.A_values = mps.get_constraint_matrix_values(); + cpu_data.A_indices = mps.get_constraint_matrix_indices(); + cpu_data.A_offsets = mps.get_constraint_matrix_offsets(); + + if (!mps.get_constraint_lower_bounds().empty() || !mps.get_constraint_upper_bounds().empty()) { + cpu_data.uses_ranged_constraints = true; + cpu_data.constraint_lower_bounds = mps.get_constraint_lower_bounds(); + cpu_data.constraint_upper_bounds = mps.get_constraint_upper_bounds(); + } else { + cpu_data.uses_ranged_constraints = false; + cpu_data.constraint_bounds = mps.get_constraint_bounds(); + const auto& mps_row_types = mps.get_row_types(); + cpu_data.row_types.resize(mps_row_types.size()); + for (size_t i = 0; i < mps_row_types.size(); ++i) { + cpu_data.row_types[i] = mps_row_types[i]; + } + } + + cpu_data.variable_lower_bounds = mps.get_variable_lower_bounds(); + cpu_data.variable_upper_bounds = mps.get_variable_upper_bounds(); + + const auto& mps_var_types = mps.get_variable_types(); + cpu_data.variable_types.resize(mps_var_types.size()); + for (size_t i = 0; i < mps_var_types.size(); ++i) { + cpu_data.variable_types[i] = + (mps_var_types[i] == 'I' || mps_var_types[i] == 'B') ? CUOPT_INTEGER : CUOPT_CONTINUOUS; + } + } else { + // Local: store in GPU memory using existing mps_data_model_to_optimization_problem + problem_and_stream->gpu_problem = + std::make_unique>( + mps_data_model_to_optimization_problem(problem_and_stream->get_handle_ptr(), + *mps_data_model_ptr)); + } + + *problem_ptr = static_cast(problem_and_stream); return CUOPT_SUCCESS; } @@ -118,32 +315,70 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->op_problem = - new optimization_problem_t(problem_and_stream->get_handle_ptr()); + problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + try { - problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE); - problem_and_stream->op_problem->set_objective_offset(objective_offset); - problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients, - num_variables); cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values, - nnz, - constraint_matrix_column_indices, - nnz, - constraint_matrix_row_offsets, - num_constraints + 1); - problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints); - problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints); - problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables); - problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables); - std::vector variable_types_host(num_variables); - for (int j = 0; j < num_variables; j++) { - variable_types_host[j] = - variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; + + if (problem_and_stream->is_remote_configured) { + // Remote: store in CPU memory + problem_and_stream->cpu_data = std::make_unique(); + auto& cpu_data = *problem_and_stream->cpu_data; + + cpu_data.num_constraints = num_constraints; + cpu_data.num_variables = num_variables; + cpu_data.maximize = (objective_sense == CUOPT_MAXIMIZE); + cpu_data.objective_offset = objective_offset; + + cpu_data.objective_coefficients.assign(objective_coefficients, + objective_coefficients + num_variables); + cpu_data.A_values.assign(constraint_matrix_coefficent_values, + constraint_matrix_coefficent_values + nnz); + cpu_data.A_indices.assign(constraint_matrix_column_indices, + constraint_matrix_column_indices + nnz); + cpu_data.A_offsets.assign(constraint_matrix_row_offsets, + constraint_matrix_row_offsets + num_constraints + 1); + + cpu_data.uses_ranged_constraints = false; + cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints); + cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints); + + cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables); + cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables); + cpu_data.variable_types.assign(variable_types, variable_types + num_variables); + } else { + // Local: store in GPU memory + problem_and_stream->gpu_problem = + std::make_unique>( + problem_and_stream->get_handle_ptr()); + auto& gpu_problem = *problem_and_stream->gpu_problem; + + gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE); + gpu_problem.set_objective_offset(objective_offset); + gpu_problem.set_objective_coefficients(objective_coefficients, num_variables); + gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values, + nnz, + constraint_matrix_column_indices, + nnz, + constraint_matrix_row_offsets, + num_constraints + 1); + gpu_problem.set_row_types(constraint_sense, num_constraints); + gpu_problem.set_constraint_bounds(rhs, num_constraints); + gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables); + gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables); + + // Convert variable types to enum + std::vector variable_types_host(num_variables); + for (cuopt_int_t j = 0; j < num_variables; j++) { + variable_types_host[j] = + variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; + } + gpu_problem.set_variable_types(variable_types_host.data(), num_variables); } - problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); + *problem_ptr = static_cast(problem_and_stream); - } catch (const raft::exception& e) { + } catch (const std::exception& e) { + delete problem_and_stream; return CUOPT_INVALID_ARGUMENT; } return CUOPT_SUCCESS; @@ -175,34 +410,73 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->op_problem = - new optimization_problem_t(problem_and_stream->get_handle_ptr()); + problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + try { - problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE); - problem_and_stream->op_problem->set_objective_offset(objective_offset); - problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients, - num_variables); cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values, - nnz, - constraint_matrix_column_indices, - nnz, - constraint_matrix_row_offsets, - num_constraints + 1); - problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds, - num_constraints); - problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds, - num_constraints); - problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables); - problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables); - std::vector variable_types_host(num_variables); - for (int j = 0; j < num_variables; j++) { - variable_types_host[j] = - variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; + + if (problem_and_stream->is_remote_configured) { + // Remote: store in CPU memory + problem_and_stream->cpu_data = std::make_unique(); + auto& cpu_data = *problem_and_stream->cpu_data; + + cpu_data.num_constraints = num_constraints; + cpu_data.num_variables = num_variables; + cpu_data.maximize = (objective_sense == CUOPT_MAXIMIZE); + cpu_data.objective_offset = objective_offset; + + cpu_data.objective_coefficients.assign(objective_coefficients, + objective_coefficients + num_variables); + cpu_data.A_values.assign(constraint_matrix_coefficent_values, + constraint_matrix_coefficent_values + nnz); + cpu_data.A_indices.assign(constraint_matrix_column_indices, + constraint_matrix_column_indices + nnz); + cpu_data.A_offsets.assign(constraint_matrix_row_offsets, + constraint_matrix_row_offsets + num_constraints + 1); + + cpu_data.uses_ranged_constraints = true; + cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds, + constraint_lower_bounds + num_constraints); + cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds, + constraint_upper_bounds + num_constraints); + + cpu_data.variable_lower_bounds.assign(variable_lower_bounds, + variable_lower_bounds + num_variables); + cpu_data.variable_upper_bounds.assign(variable_upper_bounds, + variable_upper_bounds + num_variables); + cpu_data.variable_types.assign(variable_types, variable_types + num_variables); + } else { + // Local: store in GPU memory + problem_and_stream->gpu_problem = + std::make_unique>( + problem_and_stream->get_handle_ptr()); + auto& gpu_problem = *problem_and_stream->gpu_problem; + + gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE); + gpu_problem.set_objective_offset(objective_offset); + gpu_problem.set_objective_coefficients(objective_coefficients, num_variables); + gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values, + nnz, + constraint_matrix_column_indices, + nnz, + constraint_matrix_row_offsets, + num_constraints + 1); + gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints); + gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints); + gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables); + gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables); + + std::vector variable_types_host(num_variables); + for (cuopt_int_t j = 0; j < num_variables; j++) { + variable_types_host[j] = + variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; + } + gpu_problem.set_variable_types(variable_types_host.data(), num_variables); } - problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables); + *problem_ptr = static_cast(problem_and_stream); - } catch (const raft::exception& e) { + } catch (const std::exception& e) { + delete problem_and_stream; return CUOPT_INVALID_ARGUMENT; } return CUOPT_SUCCESS; @@ -239,34 +513,77 @@ cuopt_int_t cuOptCreateQuadraticProblem( } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->op_problem = - new optimization_problem_t(problem_and_stream->get_handle_ptr()); + problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + try { - problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE); - problem_and_stream->op_problem->set_objective_offset(objective_offset); - problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients, - num_variables); cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables]; - problem_and_stream->op_problem->set_quadratic_objective_matrix( - quadratic_objective_matrix_coefficent_values, - Q_nnz, - quadratic_objective_matrix_column_indices, - Q_nnz, - quadratic_objective_matrix_row_offsets, - num_variables + 1); - cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values, - nnz, - constraint_matrix_column_indices, - nnz, - constraint_matrix_row_offsets, - num_constraints + 1); - problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints); - problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints); - problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables); - problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables); + cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; + + if (problem_and_stream->is_remote_configured) { + // Remote: store in CPU memory + problem_and_stream->cpu_data = std::make_unique(); + auto& cpu_data = *problem_and_stream->cpu_data; + + cpu_data.num_constraints = num_constraints; + cpu_data.num_variables = num_variables; + cpu_data.maximize = (objective_sense == CUOPT_MAXIMIZE); + cpu_data.objective_offset = objective_offset; + + cpu_data.objective_coefficients.assign(objective_coefficients, + objective_coefficients + num_variables); + + cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values, + quadratic_objective_matrix_coefficent_values + Q_nnz); + cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices, + quadratic_objective_matrix_column_indices + Q_nnz); + cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets, + quadratic_objective_matrix_row_offsets + num_variables + 1); + + cpu_data.A_values.assign(constraint_matrix_coefficent_values, + constraint_matrix_coefficent_values + nnz); + cpu_data.A_indices.assign(constraint_matrix_column_indices, + constraint_matrix_column_indices + nnz); + cpu_data.A_offsets.assign(constraint_matrix_row_offsets, + constraint_matrix_row_offsets + num_constraints + 1); + + cpu_data.uses_ranged_constraints = false; + cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints); + cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints); + + cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables); + cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables); + cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS); + } else { + // Local: store in GPU memory + problem_and_stream->gpu_problem = + std::make_unique>( + problem_and_stream->get_handle_ptr()); + auto& gpu_problem = *problem_and_stream->gpu_problem; + + gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE); + gpu_problem.set_objective_offset(objective_offset); + gpu_problem.set_objective_coefficients(objective_coefficients, num_variables); + gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values, + Q_nnz, + quadratic_objective_matrix_column_indices, + Q_nnz, + quadratic_objective_matrix_row_offsets, + num_variables + 1); + gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values, + nnz, + constraint_matrix_column_indices, + nnz, + constraint_matrix_row_offsets, + num_constraints + 1); + gpu_problem.set_row_types(constraint_sense, num_constraints); + gpu_problem.set_constraint_bounds(rhs, num_constraints); + gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables); + gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables); + } + *problem_ptr = static_cast(problem_and_stream); - } catch (const raft::exception& e) { + } catch (const std::exception& e) { + delete problem_and_stream; return CUOPT_INVALID_ARGUMENT; } return CUOPT_SUCCESS; @@ -304,36 +621,81 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem( } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->op_problem = - new optimization_problem_t(problem_and_stream->get_handle_ptr()); + problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + try { - problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE); - problem_and_stream->op_problem->set_objective_offset(objective_offset); - problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients, - num_variables); cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables]; - problem_and_stream->op_problem->set_quadratic_objective_matrix( - quadratic_objective_matrix_coefficent_values, - Q_nnz, - quadratic_objective_matrix_column_indices, - Q_nnz, - quadratic_objective_matrix_row_offsets, - num_variables + 1); - cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values, - nnz, - constraint_matrix_column_indices, - nnz, - constraint_matrix_row_offsets, - num_constraints + 1); - problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds, - num_constraints); - problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds, - num_constraints); - problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables); - problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables); + cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; + + if (problem_and_stream->is_remote_configured) { + // Remote: store in CPU memory + problem_and_stream->cpu_data = std::make_unique(); + auto& cpu_data = *problem_and_stream->cpu_data; + + cpu_data.num_constraints = num_constraints; + cpu_data.num_variables = num_variables; + cpu_data.maximize = (objective_sense == CUOPT_MAXIMIZE); + cpu_data.objective_offset = objective_offset; + + cpu_data.objective_coefficients.assign(objective_coefficients, + objective_coefficients + num_variables); + + cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values, + quadratic_objective_matrix_coefficent_values + Q_nnz); + cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices, + quadratic_objective_matrix_column_indices + Q_nnz); + cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets, + quadratic_objective_matrix_row_offsets + num_variables + 1); + + cpu_data.A_values.assign(constraint_matrix_coefficent_values, + constraint_matrix_coefficent_values + nnz); + cpu_data.A_indices.assign(constraint_matrix_column_indices, + constraint_matrix_column_indices + nnz); + cpu_data.A_offsets.assign(constraint_matrix_row_offsets, + constraint_matrix_row_offsets + num_constraints + 1); + + cpu_data.uses_ranged_constraints = true; + cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds, + constraint_lower_bounds + num_constraints); + cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds, + constraint_upper_bounds + num_constraints); + + cpu_data.variable_lower_bounds.assign(variable_lower_bounds, + variable_lower_bounds + num_variables); + cpu_data.variable_upper_bounds.assign(variable_upper_bounds, + variable_upper_bounds + num_variables); + cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS); + } else { + // Local: store in GPU memory + problem_and_stream->gpu_problem = + std::make_unique>( + problem_and_stream->get_handle_ptr()); + auto& gpu_problem = *problem_and_stream->gpu_problem; + + gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE); + gpu_problem.set_objective_offset(objective_offset); + gpu_problem.set_objective_coefficients(objective_coefficients, num_variables); + gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values, + Q_nnz, + quadratic_objective_matrix_column_indices, + Q_nnz, + quadratic_objective_matrix_row_offsets, + num_variables + 1); + gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values, + nnz, + constraint_matrix_column_indices, + nnz, + constraint_matrix_row_offsets, + num_constraints + 1); + gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints); + gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints); + gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables); + gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables); + } + *problem_ptr = static_cast(problem_and_stream); - } catch (const raft::exception& e) { + } catch (const std::exception& e) { + delete problem_and_stream; return CUOPT_INVALID_ARGUMENT; } return CUOPT_SUCCESS; @@ -343,7 +705,9 @@ void cuOptDestroyProblem(cuOptOptimizationProblem* problem_ptr) { if (problem_ptr == nullptr) { return; } if (*problem_ptr == nullptr) { return; } - delete static_cast(*problem_ptr); + problem_and_stream_view_t* problem_and_stream = + static_cast(*problem_ptr); + delete problem_and_stream; *problem_ptr = nullptr; } @@ -354,7 +718,11 @@ cuopt_int_t cuOptGetNumConstraints(cuOptOptimizationProblem problem, if (num_constraints_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - *num_constraints_ptr = problem_and_stream_view->op_problem->get_n_constraints(); + if (problem_and_stream_view->is_remote_configured) { + *num_constraints_ptr = problem_and_stream_view->cpu_data->num_constraints; + } else { + *num_constraints_ptr = problem_and_stream_view->gpu_problem->get_n_constraints(); + } return CUOPT_SUCCESS; } @@ -364,7 +732,11 @@ cuopt_int_t cuOptGetNumVariables(cuOptOptimizationProblem problem, cuopt_int_t* if (num_variables_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - *num_variables_ptr = problem_and_stream_view->op_problem->get_n_variables(); + if (problem_and_stream_view->is_remote_configured) { + *num_variables_ptr = problem_and_stream_view->cpu_data->num_variables; + } else { + *num_variables_ptr = problem_and_stream_view->gpu_problem->get_n_variables(); + } return CUOPT_SUCCESS; } @@ -375,8 +747,13 @@ cuopt_int_t cuOptGetObjectiveSense(cuOptOptimizationProblem problem, if (objective_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - *objective_sense_ptr = - problem_and_stream_view->op_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE; + if (problem_and_stream_view->is_remote_configured) { + *objective_sense_ptr = + problem_and_stream_view->cpu_data->maximize ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE; + } else { + *objective_sense_ptr = + problem_and_stream_view->gpu_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE; + } return CUOPT_SUCCESS; } @@ -387,7 +764,11 @@ cuopt_int_t cuOptGetObjectiveOffset(cuOptOptimizationProblem problem, if (objective_offset_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - *objective_offset_ptr = problem_and_stream_view->op_problem->get_objective_offset(); + if (problem_and_stream_view->is_remote_configured) { + *objective_offset_ptr = problem_and_stream_view->cpu_data->objective_offset; + } else { + *objective_offset_ptr = problem_and_stream_view->gpu_problem->get_objective_offset(); + } return CUOPT_SUCCESS; } @@ -398,13 +779,17 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem, if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& objective_coefficients = - problem_and_stream_view->op_problem->get_objective_coefficients(); - raft::copy(objective_coefficients_ptr, - objective_coefficients.data(), - objective_coefficients.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + if (problem_and_stream_view->is_remote_configured) { + const auto& coeffs = problem_and_stream_view->cpu_data->objective_coefficients; + std::copy(coeffs.begin(), coeffs.end(), objective_coefficients_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(objective_coefficients_ptr, + gpu_problem.get_objective_coefficients().data(), + gpu_problem.get_n_variables(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -415,7 +800,13 @@ cuopt_int_t cuOptGetNumNonZeros(cuOptOptimizationProblem problem, if (num_non_zero_elements_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - *num_non_zero_elements_ptr = problem_and_stream_view->op_problem->get_nnz(); + if (problem_and_stream_view->is_remote_configured) { + *num_non_zero_elements_ptr = + static_cast(problem_and_stream_view->cpu_data->A_values.size()); + } else { + *num_non_zero_elements_ptr = static_cast( + problem_and_stream_view->gpu_problem->get_constraint_matrix_values().size()); + } return CUOPT_SUCCESS; } @@ -430,25 +821,32 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem, if (constraint_matrix_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& constraint_matrix_coefficients = - problem_and_stream_view->op_problem->get_constraint_matrix_values(); - const rmm::device_uvector& constraint_matrix_column_indices = - problem_and_stream_view->op_problem->get_constraint_matrix_indices(); - const rmm::device_uvector& constraint_matrix_row_offsets = - problem_and_stream_view->op_problem->get_constraint_matrix_offsets(); - raft::copy(constraint_matrix_coefficients_ptr, - constraint_matrix_coefficients.data(), - constraint_matrix_coefficients.size(), - problem_and_stream_view->stream_view); - raft::copy(constraint_matrix_column_indices_ptr, - constraint_matrix_column_indices.data(), - constraint_matrix_column_indices.size(), - problem_and_stream_view->stream_view); - raft::copy(constraint_matrix_row_offsets_ptr, - constraint_matrix_row_offsets.data(), - constraint_matrix_row_offsets.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& cpu_data = *problem_and_stream_view->cpu_data; + std::copy( + cpu_data.A_values.begin(), cpu_data.A_values.end(), constraint_matrix_coefficients_ptr); + std::copy( + cpu_data.A_indices.begin(), cpu_data.A_indices.end(), constraint_matrix_column_indices_ptr); + std::copy( + cpu_data.A_offsets.begin(), cpu_data.A_offsets.end(), constraint_matrix_row_offsets_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + auto stream = gpu_problem.get_handle_ptr()->get_stream(); + raft::copy(constraint_matrix_coefficients_ptr, + gpu_problem.get_constraint_matrix_values().data(), + gpu_problem.get_constraint_matrix_values().size(), + stream); + raft::copy(constraint_matrix_column_indices_ptr, + gpu_problem.get_constraint_matrix_indices().data(), + gpu_problem.get_constraint_matrix_indices().size(), + stream); + raft::copy(constraint_matrix_row_offsets_ptr, + gpu_problem.get_constraint_matrix_offsets().data(), + gpu_problem.get_constraint_matrix_offsets().size(), + stream); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -458,13 +856,18 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons if (constraint_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& constraint_sense = - problem_and_stream_view->op_problem->get_row_types(); - raft::copy(constraint_sense_ptr, - constraint_sense.data(), - constraint_sense.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& row_types = problem_and_stream_view->cpu_data->row_types; + std::copy(row_types.begin(), row_types.end(), constraint_sense_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(constraint_sense_ptr, + gpu_problem.get_row_types().data(), + gpu_problem.get_row_types().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -475,10 +878,18 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem, if (rhs_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& rhs = - problem_and_stream_view->op_problem->get_constraint_bounds(); - raft::copy(rhs_ptr, rhs.data(), rhs.size(), problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& bounds = problem_and_stream_view->cpu_data->constraint_bounds; + std::copy(bounds.begin(), bounds.end(), rhs_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(rhs_ptr, + gpu_problem.get_constraint_bounds().data(), + gpu_problem.get_constraint_bounds().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -489,13 +900,18 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem, if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& lower_bounds = - problem_and_stream_view->op_problem->get_constraint_lower_bounds(); - raft::copy(lower_bounds_ptr, - lower_bounds.data(), - lower_bounds.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& bounds = problem_and_stream_view->cpu_data->constraint_lower_bounds; + std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(lower_bounds_ptr, + gpu_problem.get_constraint_lower_bounds().data(), + gpu_problem.get_constraint_lower_bounds().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -506,13 +922,18 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem, if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& upper_bounds = - problem_and_stream_view->op_problem->get_constraint_upper_bounds(); - raft::copy(upper_bounds_ptr, - upper_bounds.data(), - upper_bounds.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& bounds = problem_and_stream_view->cpu_data->constraint_upper_bounds; + std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(upper_bounds_ptr, + gpu_problem.get_constraint_upper_bounds().data(), + gpu_problem.get_constraint_upper_bounds().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -523,13 +944,18 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem, if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& lower_bounds = - problem_and_stream_view->op_problem->get_variable_lower_bounds(); - raft::copy(lower_bounds_ptr, - lower_bounds.data(), - lower_bounds.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& bounds = problem_and_stream_view->cpu_data->variable_lower_bounds; + std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(lower_bounds_ptr, + gpu_problem.get_variable_lower_bounds().data(), + gpu_problem.get_variable_lower_bounds().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -540,13 +966,18 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem, if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& upper_bounds = - problem_and_stream_view->op_problem->get_variable_upper_bounds(); - raft::copy(upper_bounds_ptr, - upper_bounds.data(), - upper_bounds.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); + + if (problem_and_stream_view->is_remote_configured) { + const auto& bounds = problem_and_stream_view->cpu_data->variable_upper_bounds; + std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + raft::copy(upper_bounds_ptr, + gpu_problem.get_variable_upper_bounds().data(), + gpu_problem.get_variable_upper_bounds().size(), + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + } return CUOPT_SUCCESS; } @@ -556,17 +987,24 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab if (variable_types_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - const rmm::device_uvector& variable_types = - problem_and_stream_view->op_problem->get_variable_types(); - std::vector variable_types_host(variable_types.size()); - raft::copy(variable_types_host.data(), - variable_types.data(), - variable_types.size(), - problem_and_stream_view->stream_view); - problem_and_stream_view->stream_view.synchronize(); - for (size_t j = 0; j < variable_types_host.size(); j++) { - variable_types_ptr[j] = - variable_types_host[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS; + + if (problem_and_stream_view->is_remote_configured) { + const auto& var_types = problem_and_stream_view->cpu_data->variable_types; + std::copy(var_types.begin(), var_types.end(), variable_types_ptr); + } else { + const auto& gpu_problem = *problem_and_stream_view->gpu_problem; + auto num_vars = gpu_problem.get_n_variables(); + std::vector gpu_var_types(num_vars); + raft::copy(gpu_var_types.data(), + gpu_problem.get_variable_types().data(), + num_vars, + gpu_problem.get_handle_ptr()->get_stream()); + gpu_problem.get_handle_ptr()->sync_stream(); + // Convert from var_t enum to char + for (cuopt_int_t i = 0; i < num_vars; ++i) { + variable_types_ptr[i] = + (gpu_var_types[i] == var_t::CONTINUOUS) ? CUOPT_CONTINUOUS : CUOPT_INTEGER; + } } return CUOPT_SUCCESS; } @@ -712,10 +1150,7 @@ cuopt_int_t cuOptIsMIP(cuOptOptimizationProblem problem, cuopt_int_t* is_mip_ptr if (is_mip_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - bool is_mip = - (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP) || - (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP); - *is_mip_ptr = static_cast(is_mip); + *is_mip_ptr = static_cast(problem_and_stream_view->is_mip()); return CUOPT_SUCCESS; } @@ -728,44 +1163,92 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, if (problem == nullptr) { return CUOPT_INVALID_ARGUMENT; } if (settings == nullptr) { return CUOPT_INVALID_ARGUMENT; } if (solution_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } + problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP || - problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP) { - solver_settings_t* solver_settings = - static_cast*>(settings); - mip_solver_settings_t& mip_settings = - solver_settings->get_mip_settings(); - optimization_problem_t* op_problem = - problem_and_stream_view->op_problem; - solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); - solution_and_stream_view->mip_solution_ptr = new mip_solution_t( - solve_mip(*op_problem, mip_settings)); - *solution_ptr = static_cast(solution_and_stream_view); - - cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); - - return static_cast( - solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type()); + solver_settings_t* solver_settings = + static_cast*>(settings); + + bool is_mip = problem_and_stream_view->is_mip(); + + if (problem_and_stream_view->is_remote_configured) { + // Remote path: use data_model_view_t from CPU data + auto view = problem_and_stream_view->cpu_data->create_view(); + + if (is_mip) { + mip_solver_settings_t& mip_settings = + solver_settings->get_mip_settings(); + + solution_and_stream_view_t* solution_and_stream_view = + new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); + + solution_and_stream_view->mip_solution_ptr = + new mip_solution_t(solve_mip( + problem_and_stream_view->get_handle_ptr(), view, mip_settings)); + + *solution_ptr = static_cast(solution_and_stream_view); + + cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); + + return static_cast( + solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type()); + } else { + pdlp_solver_settings_t& pdlp_settings = + solver_settings->get_pdlp_settings(); + + solution_and_stream_view_t* solution_and_stream_view = + new solution_and_stream_view_t(false, problem_and_stream_view->stream_view); + + solution_and_stream_view->lp_solution_ptr = + new optimization_problem_solution_t( + solve_lp( + problem_and_stream_view->get_handle_ptr(), view, pdlp_settings)); + + *solution_ptr = static_cast(solution_and_stream_view); + + cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); + + return static_cast( + solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type()); + } } else { - solver_settings_t* solver_settings = - static_cast*>(settings); - pdlp_solver_settings_t& pdlp_settings = - solver_settings->get_pdlp_settings(); - optimization_problem_t* op_problem = - problem_and_stream_view->op_problem; - solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(false, problem_and_stream_view->stream_view); - solution_and_stream_view->lp_solution_ptr = - new optimization_problem_solution_t( - solve_lp(*op_problem, pdlp_settings)); - *solution_ptr = static_cast(solution_and_stream_view); - - cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); - - return static_cast( - solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type()); + // Local path: use GPU problem directly (already in GPU memory) + auto& gpu_problem = *problem_and_stream_view->gpu_problem; + + if (is_mip) { + mip_solver_settings_t& mip_settings = + solver_settings->get_mip_settings(); + + solution_and_stream_view_t* solution_and_stream_view = + new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); + + solution_and_stream_view->mip_solution_ptr = new mip_solution_t( + solve_mip(gpu_problem, mip_settings)); + + *solution_ptr = static_cast(solution_and_stream_view); + + cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); + + return static_cast( + solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type()); + } else { + pdlp_solver_settings_t& pdlp_settings = + solver_settings->get_pdlp_settings(); + + solution_and_stream_view_t* solution_and_stream_view = + new solution_and_stream_view_t(false, problem_and_stream_view->stream_view); + + solution_and_stream_view->lp_solution_ptr = + new optimization_problem_solution_t( + solve_lp(gpu_problem, pdlp_settings)); + + *solution_ptr = static_cast(solution_and_stream_view); + + cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN"); + + return static_cast( + solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type()); + } } } diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index d038ade72..43e14015f 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -40,7 +40,8 @@ #include #include -#include // For std::thread +#include // For std::memcpy +#include // For std::thread namespace cuopt::linear_programming { @@ -1040,6 +1041,91 @@ cuopt::linear_programming::optimization_problem_t mps_data_model_to_op return op_problem; } +// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path) +template +static data_model_view_t create_view_from_mps_data_model( + const cuopt::mps_parser::mps_data_model_t& mps_data_model) +{ + data_model_view_t view; + + view.set_maximize(mps_data_model.get_sense()); + + if (!mps_data_model.get_constraint_matrix_values().empty()) { + view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(), + mps_data_model.get_constraint_matrix_values().size(), + mps_data_model.get_constraint_matrix_indices().data(), + mps_data_model.get_constraint_matrix_indices().size(), + mps_data_model.get_constraint_matrix_offsets().data(), + mps_data_model.get_constraint_matrix_offsets().size()); + } + + if (!mps_data_model.get_constraint_bounds().empty()) { + view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(), + mps_data_model.get_constraint_bounds().size()); + } + + if (!mps_data_model.get_objective_coefficients().empty()) { + view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(), + mps_data_model.get_objective_coefficients().size()); + } + + view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor()); + view.set_objective_offset(mps_data_model.get_objective_offset()); + + if (!mps_data_model.get_variable_lower_bounds().empty()) { + view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(), + mps_data_model.get_variable_lower_bounds().size()); + } + + if (!mps_data_model.get_variable_upper_bounds().empty()) { + view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(), + mps_data_model.get_variable_upper_bounds().size()); + } + + if (!mps_data_model.get_variable_types().empty()) { + view.set_variable_types(mps_data_model.get_variable_types().data(), + mps_data_model.get_variable_types().size()); + } + + if (!mps_data_model.get_row_types().empty()) { + view.set_row_types(mps_data_model.get_row_types().data(), + mps_data_model.get_row_types().size()); + } + + if (!mps_data_model.get_constraint_lower_bounds().empty()) { + view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(), + mps_data_model.get_constraint_lower_bounds().size()); + } + + if (!mps_data_model.get_constraint_upper_bounds().empty()) { + view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(), + mps_data_model.get_constraint_upper_bounds().size()); + } + + view.set_objective_name(mps_data_model.get_objective_name()); + view.set_problem_name(mps_data_model.get_problem_name()); + + if (!mps_data_model.get_variable_names().empty()) { + view.set_variable_names(mps_data_model.get_variable_names()); + } + + if (!mps_data_model.get_row_names().empty()) { + view.set_row_names(mps_data_model.get_row_names()); + } + + if (!mps_data_model.get_initial_primal_solution().empty()) { + view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(), + mps_data_model.get_initial_primal_solution().size()); + } + + if (!mps_data_model.get_initial_dual_solution().empty()) { + view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(), + mps_data_model.get_initial_dual_solution().size()); + } + + return view; +} + template optimization_problem_solution_t solve_lp( raft::handle_t const* handle_ptr, @@ -1048,34 +1134,211 @@ optimization_problem_solution_t solve_lp( bool problem_checking, bool use_pdlp_solver_mode) { + // Check if remote solve is configured - if so, use the view-based path + if (is_remote_solve_enabled()) { + auto view = create_view_from_mps_data_model(mps_data_model); + return solve_lp(handle_ptr, view, settings, problem_checking, use_pdlp_solver_mode); + } + + // Local solve: convert directly to optimization_problem_t (no extra copy) auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model); return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); } -#define INSTANTIATE(F_TYPE) \ - template optimization_problem_solution_t solve_lp( \ - optimization_problem_t& op_problem, \ - pdlp_solver_settings_t const& settings, \ - bool problem_checking, \ - bool use_pdlp_solver_mode, \ - bool is_batch_mode); \ - \ - template optimization_problem_solution_t solve_lp( \ - raft::handle_t const* handle_ptr, \ - const cuopt::mps_parser::mps_data_model_t& mps_data_model, \ - pdlp_solver_settings_t const& settings, \ - bool problem_checking, \ - bool use_pdlp_solver_mode); \ - \ - template optimization_problem_solution_t solve_lp_with_method( \ - detail::problem_t& problem, \ - pdlp_solver_settings_t const& settings, \ - const timer_t& timer, \ - bool is_batch_mode); \ - \ - template optimization_problem_t mps_data_model_to_optimization_problem( \ - raft::handle_t const* handle_ptr, \ - const cuopt::mps_parser::mps_data_model_t& data_model); \ +template +optimization_problem_t data_model_view_to_optimization_problem( + raft::handle_t const* handle_ptr, const data_model_view_t& view) +{ + optimization_problem_t op_problem(handle_ptr); + op_problem.set_maximize(view.get_sense()); + + op_problem.set_csr_constraint_matrix(view.get_constraint_matrix_values().data(), + view.get_constraint_matrix_values().size(), + view.get_constraint_matrix_indices().data(), + view.get_constraint_matrix_indices().size(), + view.get_constraint_matrix_offsets().data(), + view.get_constraint_matrix_offsets().size()); + + if (view.get_constraint_bounds().size() != 0) { + op_problem.set_constraint_bounds(view.get_constraint_bounds().data(), + view.get_constraint_bounds().size()); + } + if (view.get_objective_coefficients().size() != 0) { + op_problem.set_objective_coefficients(view.get_objective_coefficients().data(), + view.get_objective_coefficients().size()); + } + op_problem.set_objective_scaling_factor(view.get_objective_scaling_factor()); + op_problem.set_objective_offset(view.get_objective_offset()); + if (view.get_variable_lower_bounds().size() != 0) { + op_problem.set_variable_lower_bounds(view.get_variable_lower_bounds().data(), + view.get_variable_lower_bounds().size()); + } + if (view.get_variable_upper_bounds().size() != 0) { + op_problem.set_variable_upper_bounds(view.get_variable_upper_bounds().data(), + view.get_variable_upper_bounds().size()); + } + if (view.get_variable_types().size() != 0) { + auto var_types = view.get_variable_types(); + + // Check if the pointer is on host or device + cudaPointerAttributes attrs; + cudaError_t err = cudaPointerGetAttributes(&attrs, var_types.data()); + + std::vector host_var_types(var_types.size()); + if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) { + // Source is on GPU - copy to host + cudaMemcpy(host_var_types.data(), + var_types.data(), + var_types.size() * sizeof(char), + cudaMemcpyDeviceToHost); + } else { + // Source is on host (or unregistered) - direct copy + cudaGetLastError(); // Clear any error from cudaPointerGetAttributes + std::memcpy(host_var_types.data(), var_types.data(), var_types.size() * sizeof(char)); + } + + std::vector enum_variable_types(var_types.size()); + for (std::size_t i = 0; i < var_types.size(); ++i) { + enum_variable_types[i] = host_var_types[i] == 'I' ? var_t::INTEGER : var_t::CONTINUOUS; + } + op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size()); + } + + if (view.get_row_types().size() != 0) { + op_problem.set_row_types(view.get_row_types().data(), view.get_row_types().size()); + } + if (view.get_constraint_lower_bounds().size() != 0) { + op_problem.set_constraint_lower_bounds(view.get_constraint_lower_bounds().data(), + view.get_constraint_lower_bounds().size()); + } + if (view.get_constraint_upper_bounds().size() != 0) { + op_problem.set_constraint_upper_bounds(view.get_constraint_upper_bounds().data(), + view.get_constraint_upper_bounds().size()); + } + + if (view.get_objective_name().size() != 0) { + op_problem.set_objective_name(view.get_objective_name()); + } + if (view.get_problem_name().size() != 0) { + op_problem.set_problem_name(view.get_problem_name().data()); + } + if (view.get_variable_names().size() != 0) { + op_problem.set_variable_names(view.get_variable_names()); + } + if (view.get_row_names().size() != 0) { op_problem.set_row_names(view.get_row_names()); } + + if (view.has_quadratic_objective()) { + // Copy quadratic objective from view to vectors first since we need host data + std::vector Q_values(view.get_quadratic_objective_values().size()); + std::vector Q_indices(view.get_quadratic_objective_indices().size()); + std::vector Q_offsets(view.get_quadratic_objective_offsets().size()); + + // Check if the pointer is on host or device + cudaPointerAttributes attrs; + cudaError_t err = + cudaPointerGetAttributes(&attrs, view.get_quadratic_objective_values().data()); + + if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) { + // Source is on GPU - copy to host + cudaMemcpy(Q_values.data(), + view.get_quadratic_objective_values().data(), + Q_values.size() * sizeof(f_t), + cudaMemcpyDeviceToHost); + cudaMemcpy(Q_indices.data(), + view.get_quadratic_objective_indices().data(), + Q_indices.size() * sizeof(i_t), + cudaMemcpyDeviceToHost); + cudaMemcpy(Q_offsets.data(), + view.get_quadratic_objective_offsets().data(), + Q_offsets.size() * sizeof(i_t), + cudaMemcpyDeviceToHost); + } else { + // Source is on host - direct copy + cudaGetLastError(); // Clear any error from cudaPointerGetAttributes + std::memcpy(Q_values.data(), + view.get_quadratic_objective_values().data(), + Q_values.size() * sizeof(f_t)); + std::memcpy(Q_indices.data(), + view.get_quadratic_objective_indices().data(), + Q_indices.size() * sizeof(i_t)); + std::memcpy(Q_offsets.data(), + view.get_quadratic_objective_offsets().data(), + Q_offsets.size() * sizeof(i_t)); + } + + op_problem.set_quadratic_objective_matrix(Q_values.data(), + Q_values.size(), + Q_indices.data(), + Q_indices.size(), + Q_offsets.data(), + Q_offsets.size()); + } + + return op_problem; +} + +template +optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ptr, + const data_model_view_t& view, + pdlp_solver_settings_t const& settings, + bool problem_checking, + bool use_pdlp_solver_mode) +{ + // Initialize logger for this overload (needed for early returns) + init_logger_t log(settings.log_file, settings.log_to_console); + + // Check if remote solve is enabled + auto remote_config = get_remote_solve_config(); + if (remote_config.has_value()) { + CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", + remote_config->host.c_str(), + remote_config->port); + // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server + CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, + handle_ptr->get_stream()); + } + + // Local solve: convert data_model_view_t to optimization_problem_t and solve + auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); + return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); +} + +#define INSTANTIATE(F_TYPE) \ + template optimization_problem_solution_t solve_lp( \ + optimization_problem_t& op_problem, \ + pdlp_solver_settings_t const& settings, \ + bool problem_checking, \ + bool use_pdlp_solver_mode, \ + bool is_batch_mode); \ + \ + template optimization_problem_solution_t solve_lp( \ + raft::handle_t const* handle_ptr, \ + const cuopt::mps_parser::mps_data_model_t& mps_data_model, \ + pdlp_solver_settings_t const& settings, \ + bool problem_checking, \ + bool use_pdlp_solver_mode); \ + \ + template optimization_problem_solution_t solve_lp_with_method( \ + detail::problem_t& problem, \ + pdlp_solver_settings_t const& settings, \ + const timer_t& timer, \ + bool is_batch_mode); \ + \ + template optimization_problem_t mps_data_model_to_optimization_problem( \ + raft::handle_t const* handle_ptr, \ + const cuopt::mps_parser::mps_data_model_t& data_model); \ + \ + template optimization_problem_t data_model_view_to_optimization_problem( \ + raft::handle_t const* handle_ptr, const data_model_view_t& view); \ + \ + template optimization_problem_solution_t solve_lp( \ + raft::handle_t const* handle_ptr, \ + const data_model_view_t& view, \ + pdlp_solver_settings_t const& settings, \ + bool problem_checking, \ + bool use_pdlp_solver_mode); \ + \ template void set_pdlp_solver_mode(pdlp_solver_settings_t const& settings); #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index e6a392d40..a390c1be0 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -293,16 +293,134 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } } +// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path) +template +static data_model_view_t create_view_from_mps_data_model( + const cuopt::mps_parser::mps_data_model_t& mps_data_model) +{ + data_model_view_t view; + + view.set_maximize(mps_data_model.get_sense()); + + if (!mps_data_model.get_constraint_matrix_values().empty()) { + view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(), + mps_data_model.get_constraint_matrix_values().size(), + mps_data_model.get_constraint_matrix_indices().data(), + mps_data_model.get_constraint_matrix_indices().size(), + mps_data_model.get_constraint_matrix_offsets().data(), + mps_data_model.get_constraint_matrix_offsets().size()); + } + + if (!mps_data_model.get_constraint_bounds().empty()) { + view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(), + mps_data_model.get_constraint_bounds().size()); + } + + if (!mps_data_model.get_objective_coefficients().empty()) { + view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(), + mps_data_model.get_objective_coefficients().size()); + } + + view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor()); + view.set_objective_offset(mps_data_model.get_objective_offset()); + + if (!mps_data_model.get_variable_lower_bounds().empty()) { + view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(), + mps_data_model.get_variable_lower_bounds().size()); + } + + if (!mps_data_model.get_variable_upper_bounds().empty()) { + view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(), + mps_data_model.get_variable_upper_bounds().size()); + } + + if (!mps_data_model.get_variable_types().empty()) { + view.set_variable_types(mps_data_model.get_variable_types().data(), + mps_data_model.get_variable_types().size()); + } + + if (!mps_data_model.get_row_types().empty()) { + view.set_row_types(mps_data_model.get_row_types().data(), + mps_data_model.get_row_types().size()); + } + + if (!mps_data_model.get_constraint_lower_bounds().empty()) { + view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(), + mps_data_model.get_constraint_lower_bounds().size()); + } + + if (!mps_data_model.get_constraint_upper_bounds().empty()) { + view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(), + mps_data_model.get_constraint_upper_bounds().size()); + } + + view.set_objective_name(mps_data_model.get_objective_name()); + view.set_problem_name(mps_data_model.get_problem_name()); + + if (!mps_data_model.get_variable_names().empty()) { + view.set_variable_names(mps_data_model.get_variable_names()); + } + + if (!mps_data_model.get_row_names().empty()) { + view.set_row_names(mps_data_model.get_row_names()); + } + + if (!mps_data_model.get_initial_primal_solution().empty()) { + view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(), + mps_data_model.get_initial_primal_solution().size()); + } + + if (!mps_data_model.get_initial_dual_solution().empty()) { + view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(), + mps_data_model.get_initial_dual_solution().size()); + } + + return view; +} + template mip_solution_t solve_mip( raft::handle_t const* handle_ptr, const cuopt::mps_parser::mps_data_model_t& mps_data_model, mip_solver_settings_t const& settings) { + // Check if remote solve is configured - if so, use the view-based path + if (is_remote_solve_enabled()) { + auto view = create_view_from_mps_data_model(mps_data_model); + return solve_mip(handle_ptr, view, settings); + } + + // Local solve: convert directly to optimization_problem_t (no extra copy) auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model); return solve_mip(op_problem, settings); } +template +mip_solution_t solve_mip(raft::handle_t const* handle_ptr, + const data_model_view_t& view, + mip_solver_settings_t const& settings) +{ + // Initialize logger for this overload (needed for early returns) + init_logger_t log(settings.log_file, settings.log_to_console); + + // Check if remote solve is enabled + auto remote_config = get_remote_solve_config(); + if (remote_config.has_value()) { + CUOPT_LOG_INFO("[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", + remote_config->host.c_str(), + remote_config->port); + // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server + CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); + return mip_solution_t( + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError), + handle_ptr->get_stream()); + } + + // Local solve: convert data_model_view_t to optimization_problem_t and solve + auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); + return solve_mip(op_problem, settings); +} + #define INSTANTIATE(F_TYPE) \ template mip_solution_t solve_mip( \ optimization_problem_t& op_problem, \ @@ -311,6 +429,11 @@ mip_solution_t solve_mip( template mip_solution_t solve_mip( \ raft::handle_t const* handle_ptr, \ const cuopt::mps_parser::mps_data_model_t& mps_data_model, \ + mip_solver_settings_t const& settings); \ + \ + template mip_solution_t solve_mip( \ + raft::handle_t const* handle_ptr, \ + const data_model_view_t& view, \ mip_solver_settings_t const& settings); #if MIP_INSTANTIATE_FLOAT From b51d56113e7bfb614111d67663af971197e04402 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Wed, 24 Dec 2025 17:18:08 -0500 Subject: [PATCH 03/37] Unified data_model_view_t interface for local/remote solve - Add is_device_memory field to data_model_view_t to track memory location - Simplify solve_lp/solve_mip overloads: mps_data_model_t now creates view and delegates - C API checks remote config at creation time: GPU memory for local, CPU for remote - Handle edge case: GPU data + remote config copies to CPU with warning - Add white-box tests for all local/remote + CPU/GPU combinations --- .../include/mps_parser/data_model_view.hpp | 16 ++ cpp/src/linear_programming/cuopt_c.cpp | 185 +++++++++--- cpp/src/linear_programming/solve.cu | 159 ++++++++++- cpp/src/mip/solve.cu | 161 ++++++++++- cpp/tests/linear_programming/CMakeLists.txt | 1 + .../unit_tests/remote_solve_test.cu | 267 ++++++++++++++++++ 6 files changed, 721 insertions(+), 68 deletions(-) create mode 100644 cpp/tests/linear_programming/unit_tests/remote_solve_test.cu diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp index eb34682ce..3f340ae96 100644 --- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp +++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp @@ -406,8 +406,24 @@ class data_model_view_t { */ bool has_quadratic_objective() const noexcept; + /** + * @brief Set whether the data pointed to by this view is in device (GPU) memory. + * @note Default is false (CPU memory). Set to true when view points to GPU buffers. + * + * @param is_device true if data is in GPU memory, false if in CPU memory + */ + void set_is_device_memory(bool is_device) noexcept { is_device_memory_ = is_device; } + + /** + * @brief Check if the data pointed to by this view is in device (GPU) memory. + * + * @return true if data is in GPU memory, false if in CPU memory + */ + bool is_device_memory() const noexcept { return is_device_memory_; } + private: bool maximize_{false}; + bool is_device_memory_{false}; // true if spans point to GPU memory, false for CPU span A_; span A_indices_; span A_offsets_; diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp index f40f5928c..b2dc0101b 100644 --- a/cpp/src/linear_programming/cuopt_c.cpp +++ b/cpp/src/linear_programming/cuopt_c.cpp @@ -147,8 +147,7 @@ struct problem_cpu_data_t { struct problem_and_stream_view_t { problem_and_stream_view_t() - : is_remote_configured(false), - cpu_data(nullptr), + : cpu_data(nullptr), gpu_problem(nullptr), stream_view(rmm::cuda_stream_per_thread), handle(stream_view) @@ -162,25 +161,88 @@ struct problem_and_stream_view_t { */ bool is_mip() const { - if (is_remote_configured) { - return cpu_data && cpu_data->is_mip(); - } else { + if (view.is_device_memory()) { + // GPU path: check gpu_problem's problem category if (!gpu_problem) return false; auto cat = gpu_problem->get_problem_category(); return (cat == problem_category_t::MIP) || (cat == problem_category_t::IP); + } else { + // CPU path: check variable types in cpu_data + if (!cpu_data) return false; + return cpu_data->is_mip(); } } - // Remote solve configured at creation time - bool is_remote_configured; - // Only ONE of these is allocated (optimized memory usage): std::unique_ptr cpu_data; // for remote solve (CPU memory) std::unique_ptr> gpu_problem; // for local solve (GPU memory) + // Non-owning view pointing to whichever storage is active + // Use view.is_device_memory() to check if data is on GPU or CPU + cuopt::linear_programming::data_model_view_t view; + rmm::cuda_stream_view stream_view; raft::handle_t handle; + + /** + * @brief Create a view pointing to GPU data from the gpu_problem. + * Call this after gpu_problem is fully populated. + */ + void create_view_from_gpu_problem() + { + if (!gpu_problem) return; + auto& gpu = *gpu_problem; + + view.set_maximize(gpu.get_sense()); + view.set_objective_offset(gpu.get_objective_offset()); + view.set_objective_coefficients(gpu.get_objective_coefficients().data(), gpu.get_n_variables()); + view.set_csr_constraint_matrix(gpu.get_constraint_matrix_values().data(), + gpu.get_constraint_matrix_values().size(), + gpu.get_constraint_matrix_indices().data(), + gpu.get_constraint_matrix_indices().size(), + gpu.get_constraint_matrix_offsets().data(), + gpu.get_constraint_matrix_offsets().size()); + + if (!gpu.get_constraint_lower_bounds().is_empty()) { + view.set_constraint_lower_bounds(gpu.get_constraint_lower_bounds().data(), + gpu.get_n_constraints()); + view.set_constraint_upper_bounds(gpu.get_constraint_upper_bounds().data(), + gpu.get_n_constraints()); + } else if (!gpu.get_row_types().is_empty()) { + view.set_row_types(gpu.get_row_types().data(), gpu.get_n_constraints()); + view.set_constraint_bounds(gpu.get_constraint_bounds().data(), gpu.get_n_constraints()); + } + + view.set_variable_lower_bounds(gpu.get_variable_lower_bounds().data(), gpu.get_n_variables()); + view.set_variable_upper_bounds(gpu.get_variable_upper_bounds().data(), gpu.get_n_variables()); + + // Note: variable_types in optimization_problem_t uses var_t enum, not char + // The view's variable_types span will point to GPU memory with var_t values + // This is handled specially in solve routines + + if (gpu.has_quadratic_objective()) { + view.set_quadratic_objective_matrix(gpu.get_quadratic_objective_values().data(), + gpu.get_quadratic_objective_values().size(), + gpu.get_quadratic_objective_indices().data(), + gpu.get_quadratic_objective_indices().size(), + gpu.get_quadratic_objective_offsets().data(), + gpu.get_quadratic_objective_offsets().size()); + } + + view.set_is_device_memory(true); + } + + /** + * @brief Create a view pointing to CPU data from cpu_data. + * Call this after cpu_data is fully populated. + */ + void create_view_from_cpu_data() + { + if (!cpu_data) return; + view = cpu_data->create_view(); + view.set_is_device_memory(false); + } }; struct solution_and_stream_view_t { @@ -235,9 +297,9 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro } // Check remote solve configuration at creation time - problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + bool is_remote = is_remote_solve_enabled(); - if (problem_and_stream->is_remote_configured) { + if (is_remote) { // Remote: store in CPU memory problem_and_stream->cpu_data = std::make_unique(); auto& cpu_data = *problem_and_stream->cpu_data; @@ -277,12 +339,17 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro cpu_data.variable_types[i] = (mps_var_types[i] == 'I' || mps_var_types[i] == 'B') ? CUOPT_INTEGER : CUOPT_CONTINUOUS; } + + // Create view pointing to CPU data + problem_and_stream->create_view_from_cpu_data(); } else { // Local: store in GPU memory using existing mps_data_model_to_optimization_problem problem_and_stream->gpu_problem = std::make_unique>( mps_data_model_to_optimization_problem(problem_and_stream->get_handle_ptr(), *mps_data_model_ptr)); + // Create view pointing to GPU data + problem_and_stream->create_view_from_gpu_problem(); } *problem_ptr = static_cast(problem_and_stream); @@ -315,12 +382,12 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + bool is_remote = is_remote_solve_enabled(); try { cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - if (problem_and_stream->is_remote_configured) { + if (is_remote) { // Remote: store in CPU memory problem_and_stream->cpu_data = std::make_unique(); auto& cpu_data = *problem_and_stream->cpu_data; @@ -346,6 +413,9 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables); cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables); cpu_data.variable_types.assign(variable_types, variable_types + num_variables); + + // Create view pointing to CPU data + problem_and_stream->create_view_from_cpu_data(); } else { // Local: store in GPU memory problem_and_stream->gpu_problem = @@ -374,6 +444,9 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } gpu_problem.set_variable_types(variable_types_host.data(), num_variables); + + // Create view pointing to GPU data + problem_and_stream->create_view_from_gpu_problem(); } *problem_ptr = static_cast(problem_and_stream); @@ -410,12 +483,12 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + bool is_remote = is_remote_solve_enabled(); try { cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - if (problem_and_stream->is_remote_configured) { + if (is_remote) { // Remote: store in CPU memory problem_and_stream->cpu_data = std::make_unique(); auto& cpu_data = *problem_and_stream->cpu_data; @@ -445,6 +518,9 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, cpu_data.variable_upper_bounds.assign(variable_upper_bounds, variable_upper_bounds + num_variables); cpu_data.variable_types.assign(variable_types, variable_types + num_variables); + + // Create view pointing to CPU data + problem_and_stream->create_view_from_cpu_data(); } else { // Local: store in GPU memory problem_and_stream->gpu_problem = @@ -472,6 +548,9 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints, variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER; } gpu_problem.set_variable_types(variable_types_host.data(), num_variables); + + // Create view pointing to GPU data + problem_and_stream->create_view_from_gpu_problem(); } *problem_ptr = static_cast(problem_and_stream); @@ -513,13 +592,13 @@ cuopt_int_t cuOptCreateQuadraticProblem( } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + bool is_remote = is_remote_solve_enabled(); try { cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables]; cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - if (problem_and_stream->is_remote_configured) { + if (is_remote) { // Remote: store in CPU memory problem_and_stream->cpu_data = std::make_unique(); auto& cpu_data = *problem_and_stream->cpu_data; @@ -553,6 +632,9 @@ cuopt_int_t cuOptCreateQuadraticProblem( cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables); cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables); cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS); + + // Create view pointing to CPU data + problem_and_stream->create_view_from_cpu_data(); } else { // Local: store in GPU memory problem_and_stream->gpu_problem = @@ -579,6 +661,9 @@ cuopt_int_t cuOptCreateQuadraticProblem( gpu_problem.set_constraint_bounds(rhs, num_constraints); gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables); gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables); + + // Create view pointing to GPU data + problem_and_stream->create_view_from_gpu_problem(); } *problem_ptr = static_cast(problem_and_stream); @@ -621,13 +706,13 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem( } problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t(); - problem_and_stream->is_remote_configured = is_remote_solve_enabled(); + bool is_remote = is_remote_solve_enabled(); try { cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables]; cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints]; - if (problem_and_stream->is_remote_configured) { + if (is_remote) { // Remote: store in CPU memory problem_and_stream->cpu_data = std::make_unique(); auto& cpu_data = *problem_and_stream->cpu_data; @@ -665,6 +750,9 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem( cpu_data.variable_upper_bounds.assign(variable_upper_bounds, variable_upper_bounds + num_variables); cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS); + + // Create view pointing to CPU data + problem_and_stream->create_view_from_cpu_data(); } else { // Local: store in GPU memory problem_and_stream->gpu_problem = @@ -691,6 +779,9 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem( gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints); gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables); gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables); + + // Create view pointing to GPU data + problem_and_stream->create_view_from_gpu_problem(); } *problem_ptr = static_cast(problem_and_stream); @@ -718,7 +809,7 @@ cuopt_int_t cuOptGetNumConstraints(cuOptOptimizationProblem problem, if (num_constraints_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { *num_constraints_ptr = problem_and_stream_view->cpu_data->num_constraints; } else { *num_constraints_ptr = problem_and_stream_view->gpu_problem->get_n_constraints(); @@ -732,7 +823,7 @@ cuopt_int_t cuOptGetNumVariables(cuOptOptimizationProblem problem, cuopt_int_t* if (num_variables_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { *num_variables_ptr = problem_and_stream_view->cpu_data->num_variables; } else { *num_variables_ptr = problem_and_stream_view->gpu_problem->get_n_variables(); @@ -747,7 +838,7 @@ cuopt_int_t cuOptGetObjectiveSense(cuOptOptimizationProblem problem, if (objective_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { *objective_sense_ptr = problem_and_stream_view->cpu_data->maximize ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE; } else { @@ -764,7 +855,7 @@ cuopt_int_t cuOptGetObjectiveOffset(cuOptOptimizationProblem problem, if (objective_offset_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { *objective_offset_ptr = problem_and_stream_view->cpu_data->objective_offset; } else { *objective_offset_ptr = problem_and_stream_view->gpu_problem->get_objective_offset(); @@ -779,7 +870,7 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem, if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& coeffs = problem_and_stream_view->cpu_data->objective_coefficients; std::copy(coeffs.begin(), coeffs.end(), objective_coefficients_ptr); } else { @@ -800,7 +891,7 @@ cuopt_int_t cuOptGetNumNonZeros(cuOptOptimizationProblem problem, if (num_non_zero_elements_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; } problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { *num_non_zero_elements_ptr = static_cast(problem_and_stream_view->cpu_data->A_values.size()); } else { @@ -822,7 +913,7 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& cpu_data = *problem_and_stream_view->cpu_data; std::copy( cpu_data.A_values.begin(), cpu_data.A_values.end(), constraint_matrix_coefficients_ptr); @@ -857,7 +948,7 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& row_types = problem_and_stream_view->cpu_data->row_types; std::copy(row_types.begin(), row_types.end(), constraint_sense_ptr); } else { @@ -879,7 +970,7 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& bounds = problem_and_stream_view->cpu_data->constraint_bounds; std::copy(bounds.begin(), bounds.end(), rhs_ptr); } else { @@ -901,7 +992,7 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& bounds = problem_and_stream_view->cpu_data->constraint_lower_bounds; std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr); } else { @@ -923,7 +1014,7 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& bounds = problem_and_stream_view->cpu_data->constraint_upper_bounds; std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr); } else { @@ -945,7 +1036,7 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& bounds = problem_and_stream_view->cpu_data->variable_lower_bounds; std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr); } else { @@ -967,7 +1058,7 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem, problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& bounds = problem_and_stream_view->cpu_data->variable_upper_bounds; std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr); } else { @@ -988,7 +1079,7 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab problem_and_stream_view_t* problem_and_stream_view = static_cast(problem); - if (problem_and_stream_view->is_remote_configured) { + if (!problem_and_stream_view->view.is_device_memory()) { const auto& var_types = problem_and_stream_view->cpu_data->variable_types; std::copy(var_types.begin(), var_types.end(), variable_types_ptr); } else { @@ -1171,9 +1262,13 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, bool is_mip = problem_and_stream_view->is_mip(); - if (problem_and_stream_view->is_remote_configured) { - // Remote path: use data_model_view_t from CPU data - auto view = problem_and_stream_view->cpu_data->create_view(); + // Use the view - solve_lp/solve_mip will check is_device_memory() to determine path + const auto& view = problem_and_stream_view->view; + + if (view.is_device_memory()) { + // Local path: data is already on GPU + // Use gpu_problem directly for optimal performance (no extra copy) + auto& gpu_problem = *problem_and_stream_view->gpu_problem; if (is_mip) { mip_solver_settings_t& mip_settings = @@ -1182,9 +1277,8 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solution_and_stream_view_t* solution_and_stream_view = new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); - solution_and_stream_view->mip_solution_ptr = - new mip_solution_t(solve_mip( - problem_and_stream_view->get_handle_ptr(), view, mip_settings)); + solution_and_stream_view->mip_solution_ptr = new mip_solution_t( + solve_mip(gpu_problem, mip_settings)); *solution_ptr = static_cast(solution_and_stream_view); @@ -1201,8 +1295,7 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solution_and_stream_view->lp_solution_ptr = new optimization_problem_solution_t( - solve_lp( - problem_and_stream_view->get_handle_ptr(), view, pdlp_settings)); + solve_lp(gpu_problem, pdlp_settings)); *solution_ptr = static_cast(solution_and_stream_view); @@ -1212,9 +1305,7 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type()); } } else { - // Local path: use GPU problem directly (already in GPU memory) - auto& gpu_problem = *problem_and_stream_view->gpu_problem; - + // CPU path: use view directly - solve_lp/solve_mip handle remote vs local conversion if (is_mip) { mip_solver_settings_t& mip_settings = solver_settings->get_mip_settings(); @@ -1222,8 +1313,9 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solution_and_stream_view_t* solution_and_stream_view = new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); - solution_and_stream_view->mip_solution_ptr = new mip_solution_t( - solve_mip(gpu_problem, mip_settings)); + solution_and_stream_view->mip_solution_ptr = + new mip_solution_t(solve_mip( + problem_and_stream_view->get_handle_ptr(), view, mip_settings)); *solution_ptr = static_cast(solution_and_stream_view); @@ -1240,7 +1332,8 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solution_and_stream_view->lp_solution_ptr = new optimization_problem_solution_t( - solve_lp(gpu_problem, pdlp_settings)); + solve_lp( + problem_and_stream_view->get_handle_ptr(), view, pdlp_settings)); *solution_ptr = static_cast(solution_and_stream_view); diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 43e14015f..4e5c9c6a0 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -1134,15 +1134,11 @@ optimization_problem_solution_t solve_lp( bool problem_checking, bool use_pdlp_solver_mode) { - // Check if remote solve is configured - if so, use the view-based path - if (is_remote_solve_enabled()) { - auto view = create_view_from_mps_data_model(mps_data_model); - return solve_lp(handle_ptr, view, settings, problem_checking, use_pdlp_solver_mode); - } - - // Local solve: convert directly to optimization_problem_t (no extra copy) - auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model); - return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); + // Create a view pointing to CPU data and delegate to the view-based overload. + // The view overload handles local vs remote solve automatically. + auto view = create_view_from_mps_data_model(mps_data_model); + view.set_is_device_memory(false); // MPS data is always in CPU memory + return solve_lp(handle_ptr, view, settings, problem_checking, use_pdlp_solver_mode); } template @@ -1277,6 +1273,122 @@ optimization_problem_t data_model_view_to_optimization_problem( return op_problem; } +// Helper struct to hold CPU copies of GPU data for remote solve +template +struct cpu_problem_data_t { + std::vector A_values; + std::vector A_indices; + std::vector A_offsets; + std::vector constraint_bounds; + std::vector constraint_lower_bounds; + std::vector constraint_upper_bounds; + std::vector objective_coefficients; + std::vector variable_lower_bounds; + std::vector variable_upper_bounds; + std::vector variable_types; + std::vector quadratic_objective_values; + std::vector quadratic_objective_indices; + std::vector quadratic_objective_offsets; + bool maximize; + f_t objective_scaling_factor; + f_t objective_offset; + + data_model_view_t create_view() const + { + data_model_view_t v; + v.set_maximize(maximize); + v.set_objective_scaling_factor(objective_scaling_factor); + v.set_objective_offset(objective_offset); + + if (!A_values.empty()) { + v.set_csr_constraint_matrix(A_values.data(), + A_values.size(), + A_indices.data(), + A_indices.size(), + A_offsets.data(), + A_offsets.size()); + } + if (!constraint_bounds.empty()) { + v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size()); + } + if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) { + v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size()); + v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size()); + } + if (!objective_coefficients.empty()) { + v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + } + if (!variable_lower_bounds.empty()) { + v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size()); + } + if (!variable_upper_bounds.empty()) { + v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size()); + } + if (!variable_types.empty()) { + v.set_variable_types(variable_types.data(), variable_types.size()); + } + if (!quadratic_objective_values.empty()) { + v.set_quadratic_objective_matrix(quadratic_objective_values.data(), + quadratic_objective_values.size(), + quadratic_objective_indices.data(), + quadratic_objective_indices.size(), + quadratic_objective_offsets.data(), + quadratic_objective_offsets.size()); + } + v.set_is_device_memory(false); + return v; + } +}; + +// Helper to copy GPU view data to CPU +template +cpu_problem_data_t copy_view_to_cpu(raft::handle_t const* handle_ptr, + const data_model_view_t& gpu_view) +{ + cpu_problem_data_t cpu_data; + auto stream = handle_ptr->get_stream(); + + cpu_data.maximize = gpu_view.get_sense(); + cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor(); + cpu_data.objective_offset = gpu_view.get_objective_offset(); + + auto copy_to_host = [stream](auto& dst_vec, auto src_span) { + if (src_span.size() > 0) { + dst_vec.resize(src_span.size()); + raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream); + } + }; + + copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values()); + copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices()); + copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets()); + copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds()); + copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds()); + copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds()); + copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients()); + copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds()); + copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds()); + copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values()); + copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices()); + copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets()); + + // Variable types need special handling (char array) + auto var_types_span = gpu_view.get_variable_types(); + if (var_types_span.size() > 0) { + cpu_data.variable_types.resize(var_types_span.size()); + cudaMemcpyAsync(cpu_data.variable_types.data(), + var_types_span.data(), + var_types_span.size() * sizeof(char), + cudaMemcpyDeviceToHost, + stream); + } + + // Synchronize to ensure all copies are complete + cudaStreamSynchronize(stream); + + return cpu_data; +} + template optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ptr, const data_model_view_t& view, @@ -1287,8 +1399,33 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ // Initialize logger for this overload (needed for early returns) init_logger_t log(settings.log_file, settings.log_to_console); - // Check if remote solve is enabled + // Check for remote solve configuration first auto remote_config = get_remote_solve_config(); + + if (view.is_device_memory()) { + if (remote_config.has_value()) { + // GPU data + remote solve requested: copy to CPU and proceed with remote + CUOPT_LOG_WARN( + "[solve_lp] Remote solve requested but data is on GPU. " + "Copying to CPU for serialization (performance impact)."); + auto cpu_data = copy_view_to_cpu(handle_ptr, view); + auto cpu_view = cpu_data.create_view(); + + CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", + remote_config->host.c_str(), + remote_config->port); + // TODO: Implement remote solve - serialize cpu_view and send to remote server + CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, + handle_ptr->get_stream()); + } + + // Local solve: data already on GPU - convert view to optimization_problem_t and solve + auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); + return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); + } + + // Data is on CPU if (remote_config.has_value()) { CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), @@ -1299,7 +1436,7 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ handle_ptr->get_stream()); } - // Local solve: convert data_model_view_t to optimization_problem_t and solve + // Local solve with CPU data: copy to GPU and solve auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); } diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index a390c1be0..041cd0620 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -378,21 +378,133 @@ static data_model_view_t create_view_from_mps_data_model( return view; } +// Helper struct to hold CPU copies of GPU data for remote solve +template +struct cpu_problem_data_t { + std::vector A_values; + std::vector A_indices; + std::vector A_offsets; + std::vector constraint_bounds; + std::vector constraint_lower_bounds; + std::vector constraint_upper_bounds; + std::vector objective_coefficients; + std::vector variable_lower_bounds; + std::vector variable_upper_bounds; + std::vector variable_types; + std::vector quadratic_objective_values; + std::vector quadratic_objective_indices; + std::vector quadratic_objective_offsets; + bool maximize; + f_t objective_scaling_factor; + f_t objective_offset; + + data_model_view_t create_view() const + { + data_model_view_t v; + v.set_maximize(maximize); + v.set_objective_scaling_factor(objective_scaling_factor); + v.set_objective_offset(objective_offset); + + if (!A_values.empty()) { + v.set_csr_constraint_matrix(A_values.data(), + A_values.size(), + A_indices.data(), + A_indices.size(), + A_offsets.data(), + A_offsets.size()); + } + if (!constraint_bounds.empty()) { + v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size()); + } + if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) { + v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size()); + v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size()); + } + if (!objective_coefficients.empty()) { + v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + } + if (!variable_lower_bounds.empty()) { + v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size()); + } + if (!variable_upper_bounds.empty()) { + v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size()); + } + if (!variable_types.empty()) { + v.set_variable_types(variable_types.data(), variable_types.size()); + } + if (!quadratic_objective_values.empty()) { + v.set_quadratic_objective_matrix(quadratic_objective_values.data(), + quadratic_objective_values.size(), + quadratic_objective_indices.data(), + quadratic_objective_indices.size(), + quadratic_objective_offsets.data(), + quadratic_objective_offsets.size()); + } + v.set_is_device_memory(false); + return v; + } +}; + +// Helper to copy GPU view data to CPU +template +cpu_problem_data_t copy_view_to_cpu(raft::handle_t const* handle_ptr, + const data_model_view_t& gpu_view) +{ + cpu_problem_data_t cpu_data; + auto stream = handle_ptr->get_stream(); + + cpu_data.maximize = gpu_view.get_sense(); + cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor(); + cpu_data.objective_offset = gpu_view.get_objective_offset(); + + auto copy_to_host = [stream](auto& dst_vec, auto src_span) { + if (src_span.size() > 0) { + dst_vec.resize(src_span.size()); + raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream); + } + }; + + copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values()); + copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices()); + copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets()); + copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds()); + copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds()); + copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds()); + copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients()); + copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds()); + copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds()); + copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values()); + copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices()); + copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets()); + + // Variable types need special handling (char array) + auto var_types_span = gpu_view.get_variable_types(); + if (var_types_span.size() > 0) { + cpu_data.variable_types.resize(var_types_span.size()); + cudaMemcpyAsync(cpu_data.variable_types.data(), + var_types_span.data(), + var_types_span.size() * sizeof(char), + cudaMemcpyDeviceToHost, + stream); + } + + // Synchronize to ensure all copies are complete + cudaStreamSynchronize(stream); + + return cpu_data; +} + template mip_solution_t solve_mip( raft::handle_t const* handle_ptr, const cuopt::mps_parser::mps_data_model_t& mps_data_model, mip_solver_settings_t const& settings) { - // Check if remote solve is configured - if so, use the view-based path - if (is_remote_solve_enabled()) { - auto view = create_view_from_mps_data_model(mps_data_model); - return solve_mip(handle_ptr, view, settings); - } - - // Local solve: convert directly to optimization_problem_t (no extra copy) - auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model); - return solve_mip(op_problem, settings); + // Create a view pointing to CPU data and delegate to the view-based overload. + // The view overload handles local vs remote solve automatically. + auto view = create_view_from_mps_data_model(mps_data_model); + view.set_is_device_memory(false); // MPS data is always in CPU memory + return solve_mip(handle_ptr, view, settings); } template @@ -403,8 +515,35 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, // Initialize logger for this overload (needed for early returns) init_logger_t log(settings.log_file, settings.log_to_console); - // Check if remote solve is enabled + // Check for remote solve configuration first auto remote_config = get_remote_solve_config(); + + if (view.is_device_memory()) { + if (remote_config.has_value()) { + // GPU data + remote solve requested: copy to CPU and proceed with remote + CUOPT_LOG_WARN( + "[solve_mip] Remote solve requested but data is on GPU. " + "Copying to CPU for serialization (performance impact)."); + auto cpu_data = copy_view_to_cpu(handle_ptr, view); + auto cpu_view = cpu_data.create_view(); + + CUOPT_LOG_INFO( + "[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", + remote_config->host.c_str(), + remote_config->port); + // TODO: Implement remote solve - serialize cpu_view and send to remote server + CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); + return mip_solution_t( + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError), + handle_ptr->get_stream()); + } + + // Local solve: data already on GPU - convert view to optimization_problem_t and solve + auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); + return solve_mip(op_problem, settings); + } + + // Data is on CPU if (remote_config.has_value()) { CUOPT_LOG_INFO("[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), @@ -416,7 +555,7 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, handle_ptr->get_stream()); } - // Local solve: convert data_model_view_t to optimization_problem_t and solve + // Local solve with CPU data: copy to GPU and solve auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view); return solve_mip(op_problem, settings); } diff --git a/cpp/tests/linear_programming/CMakeLists.txt b/cpp/tests/linear_programming/CMakeLists.txt index c091751f9..11ea6fd1f 100644 --- a/cpp/tests/linear_programming/CMakeLists.txt +++ b/cpp/tests/linear_programming/CMakeLists.txt @@ -6,6 +6,7 @@ ConfigureTest(LP_UNIT_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/optimization_problem_test.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/solver_settings_test.cu + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/remote_solve_test.cu )# ################################################################################################## # - Linear programming PDLP tests ---------------------------------------------------------------------- ConfigureTest(PDLP_TEST diff --git a/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu b/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu new file mode 100644 index 000000000..08cfa7342 --- /dev/null +++ b/cpp/tests/linear_programming/unit_tests/remote_solve_test.cu @@ -0,0 +1,267 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#include + +namespace cuopt::linear_programming { + +// Test fixture that manages environment variables +class RemoteSolveTest : public ::testing::Test { + protected: + void SetUp() override + { + // Save original env vars if they exist + const char* host = std::getenv("CUOPT_REMOTE_HOST"); + const char* port = std::getenv("CUOPT_REMOTE_PORT"); + original_host_ = host ? host : ""; + original_port_ = port ? port : ""; + host_was_set_ = (host != nullptr); + port_was_set_ = (port != nullptr); + } + + void TearDown() override + { + // Restore original env vars + if (host_was_set_) { + setenv("CUOPT_REMOTE_HOST", original_host_.c_str(), 1); + } else { + unsetenv("CUOPT_REMOTE_HOST"); + } + if (port_was_set_) { + setenv("CUOPT_REMOTE_PORT", original_port_.c_str(), 1); + } else { + unsetenv("CUOPT_REMOTE_PORT"); + } + } + + void enable_remote_solve() + { + setenv("CUOPT_REMOTE_HOST", "localhost", 1); + setenv("CUOPT_REMOTE_PORT", "5000", 1); + } + + void disable_remote_solve() + { + unsetenv("CUOPT_REMOTE_HOST"); + unsetenv("CUOPT_REMOTE_PORT"); + } + + private: + std::string original_host_; + std::string original_port_; + bool host_was_set_; + bool port_was_set_; +}; + +// White-box test: GPU data + remote solve enabled +// This tests the edge case where data is on GPU but user wants remote solve +TEST_F(RemoteSolveTest, gpu_data_with_remote_solve_enabled) +{ + const raft::handle_t handle_{}; + + // Create a simple 2x2 LP problem directly on GPU + // minimize: 0.2*x1 + 0.1*x2 + // subject to: 3*x1 + 4*x2 <= 5.4 + // 2.7*x1 + 10.1*x2 <= 4.9 + // x1, x2 >= 0 + + // Allocate GPU memory for problem data + rmm::device_uvector A_values(4, handle_.get_stream()); + rmm::device_uvector A_indices(4, handle_.get_stream()); + rmm::device_uvector A_offsets(3, handle_.get_stream()); + rmm::device_uvector constraint_bounds(2, handle_.get_stream()); + rmm::device_uvector objective_coeffs(2, handle_.get_stream()); + rmm::device_uvector var_lower(2, handle_.get_stream()); + rmm::device_uvector var_upper(2, handle_.get_stream()); + + // Host data + std::vector h_A_values = {3.0, 4.0, 2.7, 10.1}; + std::vector h_A_indices = {0, 1, 0, 1}; + std::vector h_A_offsets = {0, 2, 4}; + std::vector h_bounds = {5.4, 4.9}; + std::vector h_obj = {0.2, 0.1}; + std::vector h_var_lower = {0.0, 0.0}; + std::vector h_var_upper = {1e20, 1e20}; + + // Copy to GPU + raft::copy(A_values.data(), h_A_values.data(), 4, handle_.get_stream()); + raft::copy(A_indices.data(), h_A_indices.data(), 4, handle_.get_stream()); + raft::copy(A_offsets.data(), h_A_offsets.data(), 3, handle_.get_stream()); + raft::copy(constraint_bounds.data(), h_bounds.data(), 2, handle_.get_stream()); + raft::copy(objective_coeffs.data(), h_obj.data(), 2, handle_.get_stream()); + raft::copy(var_lower.data(), h_var_lower.data(), 2, handle_.get_stream()); + raft::copy(var_upper.data(), h_var_upper.data(), 2, handle_.get_stream()); + handle_.sync_stream(); + + // Create a data_model_view_t pointing to GPU data + data_model_view_t view; + view.set_maximize(false); + view.set_csr_constraint_matrix(A_values.data(), 4, A_indices.data(), 4, A_offsets.data(), 3); + view.set_constraint_bounds(constraint_bounds.data(), 2); + view.set_objective_coefficients(objective_coeffs.data(), 2); + view.set_variable_lower_bounds(var_lower.data(), 2); + view.set_variable_upper_bounds(var_upper.data(), 2); + view.set_is_device_memory(true); // Mark as GPU data + + // Enable remote solve + enable_remote_solve(); + + // Verify remote solve is enabled + ASSERT_TRUE(is_remote_solve_enabled()); + + // Call solve_lp with GPU view + remote solve enabled + // This should trigger the GPU->CPU copy path and return the "not implemented" error + pdlp_solver_settings_t settings; + auto solution = solve_lp(&handle_, view, settings); + + // Since remote solve is not yet implemented, we expect an error status + // The key thing is that we didn't crash and the GPU->CPU copy worked + EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::NumericalError); +} + +// Control test: GPU data + local solve (no remote) +TEST_F(RemoteSolveTest, gpu_data_with_local_solve) +{ + const raft::handle_t handle_{}; + + // Same problem setup as above + rmm::device_uvector A_values(4, handle_.get_stream()); + rmm::device_uvector A_indices(4, handle_.get_stream()); + rmm::device_uvector A_offsets(3, handle_.get_stream()); + rmm::device_uvector constraint_lower(2, handle_.get_stream()); + rmm::device_uvector constraint_upper(2, handle_.get_stream()); + rmm::device_uvector objective_coeffs(2, handle_.get_stream()); + rmm::device_uvector var_lower(2, handle_.get_stream()); + rmm::device_uvector var_upper(2, handle_.get_stream()); + + std::vector h_A_values = {3.0, 4.0, 2.7, 10.1}; + std::vector h_A_indices = {0, 1, 0, 1}; + std::vector h_A_offsets = {0, 2, 4}; + std::vector h_constraint_lower = {-1e20, -1e20}; // -inf (no lower bound) + std::vector h_constraint_upper = {5.4, 4.9}; // <= constraints + std::vector h_obj = {0.2, 0.1}; + std::vector h_var_lower = {0.0, 0.0}; + std::vector h_var_upper = {1e20, 1e20}; + + raft::copy(A_values.data(), h_A_values.data(), 4, handle_.get_stream()); + raft::copy(A_indices.data(), h_A_indices.data(), 4, handle_.get_stream()); + raft::copy(A_offsets.data(), h_A_offsets.data(), 3, handle_.get_stream()); + raft::copy(constraint_lower.data(), h_constraint_lower.data(), 2, handle_.get_stream()); + raft::copy(constraint_upper.data(), h_constraint_upper.data(), 2, handle_.get_stream()); + raft::copy(objective_coeffs.data(), h_obj.data(), 2, handle_.get_stream()); + raft::copy(var_lower.data(), h_var_lower.data(), 2, handle_.get_stream()); + raft::copy(var_upper.data(), h_var_upper.data(), 2, handle_.get_stream()); + handle_.sync_stream(); + + data_model_view_t view; + view.set_maximize(false); + view.set_csr_constraint_matrix(A_values.data(), 4, A_indices.data(), 4, A_offsets.data(), 3); + view.set_constraint_lower_bounds(constraint_lower.data(), 2); + view.set_constraint_upper_bounds(constraint_upper.data(), 2); + view.set_objective_coefficients(objective_coeffs.data(), 2); + view.set_variable_lower_bounds(var_lower.data(), 2); + view.set_variable_upper_bounds(var_upper.data(), 2); + view.set_is_device_memory(true); + + // Disable remote solve + disable_remote_solve(); + + // Verify remote solve is disabled + ASSERT_FALSE(is_remote_solve_enabled()); + + // Call solve_lp - should solve locally + pdlp_solver_settings_t settings; + auto solution = solve_lp(&handle_, view, settings); + + // Should succeed with optimal status + EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::Optimal); +} + +// Test: CPU data + remote solve enabled +TEST_F(RemoteSolveTest, cpu_data_with_remote_solve_enabled) +{ + const raft::handle_t handle_{}; + + // Host data (CPU) + std::vector h_A_values = {3.0, 4.0, 2.7, 10.1}; + std::vector h_A_indices = {0, 1, 0, 1}; + std::vector h_A_offsets = {0, 2, 4}; + std::vector h_bounds = {5.4, 4.9}; + std::vector h_obj = {0.2, 0.1}; + std::vector h_var_lower = {0.0, 0.0}; + std::vector h_var_upper = {1e20, 1e20}; + + // Create view pointing to CPU data + data_model_view_t view; + view.set_maximize(false); + view.set_csr_constraint_matrix( + h_A_values.data(), 4, h_A_indices.data(), 4, h_A_offsets.data(), 3); + view.set_constraint_bounds(h_bounds.data(), 2); + view.set_objective_coefficients(h_obj.data(), 2); + view.set_variable_lower_bounds(h_var_lower.data(), 2); + view.set_variable_upper_bounds(h_var_upper.data(), 2); + view.set_is_device_memory(false); // CPU data + + // Enable remote solve + enable_remote_solve(); + ASSERT_TRUE(is_remote_solve_enabled()); + + // Should go to remote path (and return not implemented error) + pdlp_solver_settings_t settings; + auto solution = solve_lp(&handle_, view, settings); + + EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::NumericalError); +} + +// Test: CPU data + local solve +TEST_F(RemoteSolveTest, cpu_data_with_local_solve) +{ + const raft::handle_t handle_{}; + + std::vector h_A_values = {3.0, 4.0, 2.7, 10.1}; + std::vector h_A_indices = {0, 1, 0, 1}; + std::vector h_A_offsets = {0, 2, 4}; + std::vector h_constraint_lower = {-1e20, -1e20}; // -inf (no lower bound) + std::vector h_constraint_upper = {5.4, 4.9}; // <= constraints + std::vector h_obj = {0.2, 0.1}; + std::vector h_var_lower = {0.0, 0.0}; + std::vector h_var_upper = {1e20, 1e20}; + + data_model_view_t view; + view.set_maximize(false); + view.set_csr_constraint_matrix( + h_A_values.data(), 4, h_A_indices.data(), 4, h_A_offsets.data(), 3); + view.set_constraint_lower_bounds(h_constraint_lower.data(), 2); + view.set_constraint_upper_bounds(h_constraint_upper.data(), 2); + view.set_objective_coefficients(h_obj.data(), 2); + view.set_variable_lower_bounds(h_var_lower.data(), 2); + view.set_variable_upper_bounds(h_var_upper.data(), 2); + view.set_is_device_memory(false); + + disable_remote_solve(); + ASSERT_FALSE(is_remote_solve_enabled()); + + // Should copy to GPU and solve locally + pdlp_solver_settings_t settings; + auto solution = solve_lp(&handle_, view, settings); + + EXPECT_EQ(solution.get_termination_status(), pdlp_termination_status_t::Optimal); +} + +} // namespace cuopt::linear_programming From 04aca2f0ded66faa619659f7f1921ca5c76a1cbf Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Fri, 2 Jan 2026 11:19:29 -0500 Subject: [PATCH 04/37] Add Python path support for transparent remote solve - Modify cython_solve.cu to check for remote solve configuration early - When remote is enabled, mark data_model as CPU memory and call solve_lp/solve_mip with data_model_view_t overloads - Fix segfault by initializing warm start data to empty device buffers instead of nullptr when returning from remote solve path - Local solve path unchanged - still creates GPU optimization_problem_t --- .../utilities/cython_solve.cu | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu index 0e1dbc6af..6d847298d 100644 --- a/cpp/src/linear_programming/utilities/cython_solve.cu +++ b/cpp/src/linear_programming/utilities/cython_solve.cu @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -237,6 +238,93 @@ std::unique_ptr call_solve( // flags)); // flags=cudaStreamNonBlocking const raft::handle_t handle_{stream}; const raft::handle_t handle_{}; + // Check if remote solve is configured + if (linear_programming::is_remote_solve_enabled()) { + // Data coming from Python is in CPU memory - mark it as such + data_model->set_is_device_memory(false); + + solver_ret_t response; + + // Determine if LP or MIP based on variable types + bool is_mip = false; + auto var_types = data_model->get_variable_types(); + for (size_t i = 0; i < var_types.size(); ++i) { + if (var_types.data()[i] != 'C') { + is_mip = true; + break; + } + } + + if (!is_mip) { + // LP: call solve_lp with view - it will handle remote + auto solution = + linear_programming::solve_lp(&handle_, *data_model, solver_settings->get_pdlp_settings()); + + // Convert solution to linear_programming_ret_t + auto term_info = solution.get_additional_termination_information(); + linear_programming_ret_t lp_ret{ + std::make_unique(solution.get_primal_solution().release()), + std::make_unique(solution.get_dual_solution().release()), + std::make_unique(solution.get_reduced_cost().release()), + // Warm start data - create empty buffers to avoid null pointer issues in Python wrapper + std::make_unique(), // current_primal_solution + std::make_unique(), // current_dual_solution + std::make_unique(), // initial_primal_average + std::make_unique(), // initial_dual_average + std::make_unique(), // current_ATY + std::make_unique(), // sum_primal_solutions + std::make_unique(), // sum_dual_solutions + std::make_unique(), // last_restart_duality_gap_primal_solution + std::make_unique(), // last_restart_duality_gap_dual_solution + 0.0, // initial_primal_weight + 0.0, // initial_step_size + 0, // total_pdlp_iterations + 0, // total_pdhg_iterations + 0.0, // last_candidate_kkt_score + 0.0, // last_restart_kkt_score + 0.0, // sum_solution_weight + 0, // iterations_since_last_restart + solution.get_termination_status(), + solution.get_error_status().get_error_type(), + solution.get_error_status().what(), + term_info.l2_primal_residual, + term_info.l2_dual_residual, + term_info.primal_objective, + term_info.dual_objective, + term_info.gap, + term_info.number_of_steps_taken, + solution.get_solve_time(), + false // solved_by_pdlp + }; + response.lp_ret = std::move(lp_ret); + response.problem_type = linear_programming::problem_category_t::LP; + } else { + // MIP: call solve_mip with view - it will handle remote + auto solution = + linear_programming::solve_mip(&handle_, *data_model, solver_settings->get_mip_settings()); + + mip_ret_t mip_ret{std::make_unique(solution.get_solution().release()), + solution.get_termination_status(), + solution.get_error_status().get_error_type(), + solution.get_error_status().what(), + solution.get_objective_value(), + solution.get_mip_gap(), + solution.get_solution_bound(), + solution.get_total_solve_time(), + solution.get_presolve_time(), + solution.get_max_constraint_violation(), + solution.get_max_int_violation(), + solution.get_max_variable_bound_violation(), + solution.get_num_nodes(), + solution.get_num_simplex_iterations()}; + response.mip_ret = std::move(mip_ret); + response.problem_type = linear_programming::problem_category_t::MIP; + } + + return std::make_unique(std::move(response)); + } + + // Local solve: proceed as before - create GPU problem and solve auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_); solver_ret_t response; if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) { From 05a7e40a0ddbd2f0d27757b7b6cb6c5dddfa7886 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Fri, 2 Jan 2026 14:05:00 -0500 Subject: [PATCH 05/37] Refactor solution objects to support CPU/GPU memory for transparent remote solve - Add is_device_memory flag to optimization_problem_solution_t and mip_solution_t - Use std::unique_ptr for both CPU (std::vector) and GPU (rmm::device_uvector) storage - Add _host() getters for C++ users to access CPU-resident solution data - Update cython_solve.cu to populate CPU or GPU storage based on solve type - Update Python wrapper to transparently handle both CPU and GPU solution data - Backward compatible: existing GPU-based code unchanged, Python users need no changes --- .../mip/solver_solution.hpp | 41 ++- .../pdlp/solver_solution.hpp | 89 +++++- .../utilities/cython_solve.hpp | 19 +- cpp/src/linear_programming/solver_solution.cu | 263 ++++++++++++++---- .../utilities/cython_solve.cu | 249 ++++++++++------- cpp/src/mip/solver_solution.cu | 103 ++++++- .../linear_programming/solver/solver.pxd | 14 +- .../solver/solver_wrapper.pyx | 42 ++- 8 files changed, 628 insertions(+), 192 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp index 6ff8d324b..385b1e0b9 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -51,10 +52,38 @@ class mip_solution_t : public base_solution_t { rmm::cuda_stream_view stream_view); mip_solution_t(const cuopt::logic_error& error_status, rmm::cuda_stream_view stream_view); + // CPU-only constructors for remote solve + mip_solution_t(std::vector solution, + std::vector var_names, + f_t objective, + f_t mip_gap, + mip_termination_status_t termination_status, + f_t max_constraint_violation, + f_t max_int_violation, + f_t max_variable_bound_violation, + solver_stats_t stats); + + mip_solution_t(mip_termination_status_t termination_status, solver_stats_t stats); + mip_solution_t(const cuopt::logic_error& error_status); + bool is_mip() const override { return true; } + + /** + * @brief Check if solution data is stored in device (GPU) memory + * @return true if data is in GPU memory, false if in CPU memory + */ + bool is_device_memory() const; + const rmm::device_uvector& get_solution() const; rmm::device_uvector& get_solution(); + /** + * @brief Returns the solution in host (CPU) memory. + * Only valid when is_device_memory() returns false. + */ + std::vector& get_solution_host(); + const std::vector& get_solution_host() const; + f_t get_objective_value() const; f_t get_mip_gap() const; f_t get_solution_bound() const; @@ -76,7 +105,15 @@ class mip_solution_t : public base_solution_t { void log_summary() const; private: - rmm::device_uvector solution_; + // GPU (device) storage - populated for local GPU solves + std::unique_ptr> solution_; + + // CPU (host) storage - populated for remote solves + std::unique_ptr> solution_host_; + + // Flag indicating where solution data is stored + bool is_device_memory_ = true; + std::vector var_names_; f_t objective_; f_t mip_gap_; diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index c5fe96ef1..1dbac6933 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -168,6 +169,42 @@ class optimization_problem_solution_t : public base_solution_t { const raft::handle_t* handler_ptr, bool deep_copy); + /** + * @brief Construct an optimization problem solution with CPU (host) memory storage. + * Used for remote solve scenarios where no GPU is available. + * + * @param[in] primal_solution The primal solution in host memory + * @param[in] dual_solution The dual solution in host memory + * @param[in] reduced_cost The reduced cost in host memory + * @param[in] objective_name The objective name + * @param[in] var_names The variables names + * @param[in] row_names The rows name + * @param[in] termination_stats The termination statistics + * @param[in] termination_status The termination reason + */ + optimization_problem_solution_t(std::vector primal_solution, + std::vector dual_solution, + std::vector reduced_cost, + const std::string objective_name, + const std::vector& var_names, + const std::vector& row_names, + additional_termination_information_t& termination_stats, + pdlp_termination_status_t termination_status); + + /** + * @brief Construct an empty solution for CPU-only scenarios (e.g., remote solve error) + * + * @param[in] termination_status Reason for termination + */ + optimization_problem_solution_t(pdlp_termination_status_t termination_status); + + /** + * @brief Construct an error solution for CPU-only scenarios + * + * @param[in] error_status The error object + */ + optimization_problem_solution_t(cuopt::logic_error error_status); + /** * @brief Set the solve time in seconds * @@ -234,6 +271,40 @@ class optimization_problem_solution_t : public base_solution_t { */ rmm::device_uvector& get_reduced_cost(); + /** + * @brief Check if solution data is stored in device (GPU) memory + * + * @return true if data is in GPU memory, false if in CPU memory + */ + bool is_device_memory() const; + + /** + * @brief Returns the primal solution in host (CPU) memory. + * Only valid when is_device_memory() returns false. + * + * @return std::vector& The host memory container for the primal solution. + */ + std::vector& get_primal_solution_host(); + const std::vector& get_primal_solution_host() const; + + /** + * @brief Returns the dual solution in host (CPU) memory. + * Only valid when is_device_memory() returns false. + * + * @return std::vector& The host memory container for the dual solution. + */ + std::vector& get_dual_solution_host(); + const std::vector& get_dual_solution_host() const; + + /** + * @brief Returns the reduced cost in host (CPU) memory. + * Only valid when is_device_memory() returns false. + * + * @return std::vector& The host memory container for the reduced cost. + */ + std::vector& get_reduced_cost_host(); + const std::vector& get_reduced_cost_host() const; + /** * @brief Get termination reason * @return Termination reason @@ -285,9 +356,19 @@ class optimization_problem_solution_t : public base_solution_t { private: void write_additional_termination_statistics_to_file(std::ofstream& myfile); - rmm::device_uvector primal_solution_; - rmm::device_uvector dual_solution_; - rmm::device_uvector reduced_cost_; + // GPU (device) storage - populated for local GPU solves + std::unique_ptr> primal_solution_; + std::unique_ptr> dual_solution_; + std::unique_ptr> reduced_cost_; + + // CPU (host) storage - populated for remote solves + std::unique_ptr> primal_solution_host_; + std::unique_ptr> dual_solution_host_; + std::unique_ptr> reduced_cost_host_; + + // Flag indicating where solution data is stored + bool is_device_memory_ = true; + pdlp_warm_start_data_t pdlp_warm_start_data_; pdlp_termination_status_t termination_status_; diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp index e1a75747d..abe49a2be 100644 --- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -25,9 +25,19 @@ namespace cython { // aggregate for call_solve() return type // to be exposed to cython: struct linear_programming_ret_t { + // GPU (device) storage - populated for local GPU solves std::unique_ptr primal_solution_; std::unique_ptr dual_solution_; std::unique_ptr reduced_cost_; + + // CPU (host) storage - populated for remote solves + std::vector primal_solution_host_; + std::vector dual_solution_host_; + std::vector reduced_cost_host_; + + // Flag indicating where solution data is stored + bool is_device_memory_ = true; + /* -- PDLP Warm Start Data -- */ std::unique_ptr current_primal_solution_; std::unique_ptr current_dual_solution_; @@ -64,8 +74,15 @@ struct linear_programming_ret_t { }; struct mip_ret_t { + // GPU (device) storage - populated for local GPU solves std::unique_ptr solution_; + // CPU (host) storage - populated for remote solves + std::vector solution_host_; + + // Flag indicating where solution data is stored + bool is_device_memory_ = true; + linear_programming::mip_termination_status_t termination_status_; error_type_t error_status_; std::string error_message_; diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu index 1409e7cbf..027003fb1 100644 --- a/cpp/src/linear_programming/solver_solution.cu +++ b/cpp/src/linear_programming/solver_solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -21,9 +21,10 @@ namespace cuopt::linear_programming { template optimization_problem_solution_t::optimization_problem_solution_t( pdlp_termination_status_t termination_status, rmm::cuda_stream_view stream_view) - : primal_solution_{0, stream_view}, - dual_solution_{0, stream_view}, - reduced_cost_{0, stream_view}, + : primal_solution_(std::make_unique>(0, stream_view)), + dual_solution_(std::make_unique>(0, stream_view)), + reduced_cost_(std::make_unique>(0, stream_view)), + is_device_memory_(true), termination_status_(termination_status), error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) { @@ -32,14 +33,41 @@ optimization_problem_solution_t::optimization_problem_solution_t( template optimization_problem_solution_t::optimization_problem_solution_t( cuopt::logic_error error_status_, rmm::cuda_stream_view stream_view) - : primal_solution_{0, stream_view}, - dual_solution_{0, stream_view}, - reduced_cost_{0, stream_view}, + : primal_solution_(std::make_unique>(0, stream_view)), + dual_solution_(std::make_unique>(0, stream_view)), + reduced_cost_(std::make_unique>(0, stream_view)), + is_device_memory_(true), termination_status_(pdlp_termination_status_t::NoTermination), error_status_(error_status_) { } +// CPU-only constructor for remote solve error cases +template +optimization_problem_solution_t::optimization_problem_solution_t( + pdlp_termination_status_t termination_status) + : primal_solution_host_(std::make_unique>()), + dual_solution_host_(std::make_unique>()), + reduced_cost_host_(std::make_unique>()), + is_device_memory_(false), + termination_status_(termination_status), + error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) +{ +} + +// CPU-only constructor for remote solve error cases +template +optimization_problem_solution_t::optimization_problem_solution_t( + cuopt::logic_error error_status) + : primal_solution_host_(std::make_unique>()), + dual_solution_host_(std::make_unique>()), + reduced_cost_host_(std::make_unique>()), + is_device_memory_(false), + termination_status_(pdlp_termination_status_t::NoTermination), + error_status_(error_status) +{ +} + template optimization_problem_solution_t::optimization_problem_solution_t( rmm::device_uvector& final_primal_solution, @@ -51,15 +79,16 @@ optimization_problem_solution_t::optimization_problem_solution_t( const std::vector& row_names, additional_termination_information_t& termination_stats, pdlp_termination_status_t termination_status) - : primal_solution_(std::move(final_primal_solution)), - dual_solution_(std::move(final_dual_solution)), - reduced_cost_(std::move(final_reduced_cost)), + : primal_solution_(std::make_unique>(std::move(final_primal_solution))), + dual_solution_(std::make_unique>(std::move(final_dual_solution))), + reduced_cost_(std::make_unique>(std::move(final_reduced_cost))), + is_device_memory_(true), pdlp_warm_start_data_(std::move(warm_start_data)), + termination_status_(termination_status), + termination_stats_(std::move(termination_stats)), objective_name_(objective_name), var_names_(std::move(var_names)), row_names_(std::move(row_names)), - termination_stats_(std::move(termination_stats)), - termination_status_(termination_status), error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) { } @@ -74,14 +103,15 @@ optimization_problem_solution_t::optimization_problem_solution_t( const std::vector& row_names, additional_termination_information_t& termination_stats, pdlp_termination_status_t termination_status) - : primal_solution_(std::move(final_primal_solution)), - dual_solution_(std::move(final_dual_solution)), - reduced_cost_(std::move(final_reduced_cost)), + : primal_solution_(std::make_unique>(std::move(final_primal_solution))), + dual_solution_(std::make_unique>(std::move(final_dual_solution))), + reduced_cost_(std::make_unique>(std::move(final_reduced_cost))), + is_device_memory_(true), + termination_status_(termination_status), + termination_stats_(std::move(termination_stats)), objective_name_(objective_name), var_names_(std::move(var_names)), row_names_(std::move(row_names)), - termination_stats_(std::move(termination_stats)), - termination_status_(termination_status), error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) { } @@ -98,14 +128,42 @@ optimization_problem_solution_t::optimization_problem_solution_t( pdlp_termination_status_t termination_status, const raft::handle_t* handler_ptr, [[maybe_unused]] bool deep_copy) - : primal_solution_(final_primal_solution, handler_ptr->get_stream()), - dual_solution_(final_dual_solution, handler_ptr->get_stream()), - reduced_cost_(final_reduced_cost, handler_ptr->get_stream()), + : primal_solution_( + std::make_unique>(final_primal_solution, handler_ptr->get_stream())), + dual_solution_( + std::make_unique>(final_dual_solution, handler_ptr->get_stream())), + reduced_cost_( + std::make_unique>(final_reduced_cost, handler_ptr->get_stream())), + is_device_memory_(true), + termination_status_(termination_status), + termination_stats_(termination_stats), objective_name_(objective_name), var_names_(var_names), row_names_(row_names), - termination_stats_(termination_stats), + error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) +{ +} + +// CPU-only constructor for remote solve with solution data +template +optimization_problem_solution_t::optimization_problem_solution_t( + std::vector primal_solution, + std::vector dual_solution, + std::vector reduced_cost, + const std::string objective_name, + const std::vector& var_names, + const std::vector& row_names, + additional_termination_information_t& termination_stats, + pdlp_termination_status_t termination_status) + : primal_solution_host_(std::make_unique>(std::move(primal_solution))), + dual_solution_host_(std::make_unique>(std::move(dual_solution))), + reduced_cost_host_(std::make_unique>(std::move(reduced_cost))), + is_device_memory_(false), termination_status_(termination_status), + termination_stats_(std::move(termination_stats)), + objective_name_(objective_name), + var_names_(var_names), + row_names_(row_names), error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) { } @@ -114,31 +172,56 @@ template void optimization_problem_solution_t::copy_from( const raft::handle_t* handle_ptr, const optimization_problem_solution_t& other) { - // Resize to make sure they are of same size - primal_solution_.resize(other.primal_solution_.size(), handle_ptr->get_stream()); - dual_solution_.resize(other.dual_solution_.size(), handle_ptr->get_stream()); - reduced_cost_.resize(other.reduced_cost_.size(), handle_ptr->get_stream()); - - // Copy the data - raft::copy(primal_solution_.data(), - other.primal_solution_.data(), - primal_solution_.size(), - handle_ptr->get_stream()); - raft::copy(dual_solution_.data(), - other.dual_solution_.data(), - dual_solution_.size(), - handle_ptr->get_stream()); - raft::copy(reduced_cost_.data(), - other.reduced_cost_.data(), - reduced_cost_.size(), - handle_ptr->get_stream()); + is_device_memory_ = other.is_device_memory_; + + if (other.is_device_memory_) { + // Copy GPU data + if (!primal_solution_) { + primal_solution_ = std::make_unique>(0, handle_ptr->get_stream()); + } + if (!dual_solution_) { + dual_solution_ = std::make_unique>(0, handle_ptr->get_stream()); + } + if (!reduced_cost_) { + reduced_cost_ = std::make_unique>(0, handle_ptr->get_stream()); + } + + // Resize to make sure they are of same size + primal_solution_->resize(other.primal_solution_->size(), handle_ptr->get_stream()); + dual_solution_->resize(other.dual_solution_->size(), handle_ptr->get_stream()); + reduced_cost_->resize(other.reduced_cost_->size(), handle_ptr->get_stream()); + + // Copy the data + raft::copy(primal_solution_->data(), + other.primal_solution_->data(), + primal_solution_->size(), + handle_ptr->get_stream()); + raft::copy(dual_solution_->data(), + other.dual_solution_->data(), + dual_solution_->size(), + handle_ptr->get_stream()); + raft::copy(reduced_cost_->data(), + other.reduced_cost_->data(), + reduced_cost_->size(), + handle_ptr->get_stream()); + handle_ptr->sync_stream(); + } else { + // Copy CPU data + if (!primal_solution_host_) { primal_solution_host_ = std::make_unique>(); } + if (!dual_solution_host_) { dual_solution_host_ = std::make_unique>(); } + if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique>(); } + + *primal_solution_host_ = *other.primal_solution_host_; + *dual_solution_host_ = *other.dual_solution_host_; + *reduced_cost_host_ = *other.reduced_cost_host_; + } + termination_stats_ = other.termination_stats_; termination_status_ = other.termination_status_; objective_name_ = other.objective_name_; var_names_ = other.var_names_; row_names_ = other.row_names_; // We do not copy the warm start info. As it is not needed for this purpose. - handle_ptr->sync_stream(); } template @@ -203,18 +286,31 @@ void optimization_problem_solution_t::write_to_file(std::string_view f << std::endl; return; } + std::vector primal_solution; std::vector dual_solution; std::vector reduced_cost; - primal_solution.resize(primal_solution_.size()); - dual_solution.resize(dual_solution_.size()); - reduced_cost.resize(reduced_cost_.size()); - raft::copy( - primal_solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value()); - raft::copy( - dual_solution.data(), dual_solution_.data(), dual_solution_.size(), stream_view.value()); - raft::copy(reduced_cost.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view.value()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + + if (is_device_memory_) { + // Copy from GPU to CPU + primal_solution.resize(primal_solution_->size()); + dual_solution.resize(dual_solution_->size()); + reduced_cost.resize(reduced_cost_->size()); + raft::copy(primal_solution.data(), + primal_solution_->data(), + primal_solution_->size(), + stream_view.value()); + raft::copy( + dual_solution.data(), dual_solution_->data(), dual_solution_->size(), stream_view.value()); + raft::copy( + reduced_cost.data(), reduced_cost_->data(), reduced_cost_->size(), stream_view.value()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + } else { + // Already on CPU + primal_solution = *primal_solution_host_; + dual_solution = *dual_solution_host_; + reduced_cost = *reduced_cost_host_; + } myfile << "{ " << std::endl; myfile << "\t\"Termination reason\" : \"" << get_termination_status_string() << "\"," @@ -305,35 +401,78 @@ f_t optimization_problem_solution_t::get_dual_objective_value() const return termination_stats_.dual_objective; } +template +bool optimization_problem_solution_t::is_device_memory() const +{ + return is_device_memory_; +} + template rmm::device_uvector& optimization_problem_solution_t::get_primal_solution() { - return primal_solution_; + return *primal_solution_; } template const rmm::device_uvector& optimization_problem_solution_t::get_primal_solution() const { - return primal_solution_; + return *primal_solution_; } template rmm::device_uvector& optimization_problem_solution_t::get_dual_solution() { - return dual_solution_; + return *dual_solution_; } template const rmm::device_uvector& optimization_problem_solution_t::get_dual_solution() const { - return dual_solution_; + return *dual_solution_; } template rmm::device_uvector& optimization_problem_solution_t::get_reduced_cost() { - return reduced_cost_; + return *reduced_cost_; +} + +// Host (CPU) getters +template +std::vector& optimization_problem_solution_t::get_primal_solution_host() +{ + return *primal_solution_host_; +} + +template +const std::vector& optimization_problem_solution_t::get_primal_solution_host() const +{ + return *primal_solution_host_; +} + +template +std::vector& optimization_problem_solution_t::get_dual_solution_host() +{ + return *dual_solution_host_; +} + +template +const std::vector& optimization_problem_solution_t::get_dual_solution_host() const +{ + return *dual_solution_host_; +} + +template +std::vector& optimization_problem_solution_t::get_reduced_cost_host() +{ + return *reduced_cost_host_; +} + +template +const std::vector& optimization_problem_solution_t::get_reduced_cost_host() const +{ + return *reduced_cost_host_; } template @@ -374,10 +513,18 @@ void optimization_problem_solution_t::write_to_sol_file( auto objective_value = get_objective_value(); std::vector solution; - solution.resize(primal_solution_.size()); - raft::copy( - solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + + if (is_device_memory_) { + // Copy from GPU to CPU + solution.resize(primal_solution_->size()); + raft::copy( + solution.data(), primal_solution_->data(), primal_solution_->size(), stream_view.value()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + } else { + // Already on CPU + solution = *primal_solution_host_; + } + solution_writer_t::write_solution_to_sol_file( std::string(filename), status, objective_value, var_names_, solution); } diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu index 6d847298d..972f073ee 100644 --- a/cpp/src/linear_programming/utilities/cython_solve.cu +++ b/cpp/src/linear_programming/utilities/cython_solve.cu @@ -143,47 +143,58 @@ linear_programming_ret_t call_solve_lp( const bool use_pdlp_solver_mode = true; auto solution = cuopt::linear_programming::solve_lp( op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode); - linear_programming_ret_t lp_ret{ - std::make_unique(solution.get_primal_solution().release()), - std::make_unique(solution.get_dual_solution().release()), - std::make_unique(solution.get_reduced_cost().release()), - std::make_unique( - solution.get_pdlp_warm_start_data().current_primal_solution_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().current_dual_solution_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().initial_primal_average_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().initial_dual_average_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().current_ATY_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()), - std::make_unique( - solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()), - solution.get_pdlp_warm_start_data().initial_primal_weight_, - solution.get_pdlp_warm_start_data().initial_step_size_, - solution.get_pdlp_warm_start_data().total_pdlp_iterations_, - solution.get_pdlp_warm_start_data().total_pdhg_iterations_, - solution.get_pdlp_warm_start_data().last_candidate_kkt_score_, - solution.get_pdlp_warm_start_data().last_restart_kkt_score_, - solution.get_pdlp_warm_start_data().sum_solution_weight_, - solution.get_pdlp_warm_start_data().iterations_since_last_restart_, - solution.get_termination_status(), - solution.get_error_status().get_error_type(), - solution.get_error_status().what(), - solution.get_additional_termination_information().l2_primal_residual, - solution.get_additional_termination_information().l2_dual_residual, - solution.get_additional_termination_information().primal_objective, - solution.get_additional_termination_information().dual_objective, - solution.get_additional_termination_information().gap, - solution.get_additional_termination_information().number_of_steps_taken, - solution.get_additional_termination_information().solve_time, - solution.get_additional_termination_information().solved_by_pdlp}; + + linear_programming_ret_t lp_ret; + + // GPU data (local solve always uses GPU) + lp_ret.primal_solution_ = + std::make_unique(solution.get_primal_solution().release()); + lp_ret.dual_solution_ = + std::make_unique(solution.get_dual_solution().release()); + lp_ret.reduced_cost_ = + std::make_unique(solution.get_reduced_cost().release()); + lp_ret.is_device_memory_ = true; + + // Warm start data + lp_ret.current_primal_solution_ = std::make_unique( + solution.get_pdlp_warm_start_data().current_primal_solution_.release()); + lp_ret.current_dual_solution_ = std::make_unique( + solution.get_pdlp_warm_start_data().current_dual_solution_.release()); + lp_ret.initial_primal_average_ = std::make_unique( + solution.get_pdlp_warm_start_data().initial_primal_average_.release()); + lp_ret.initial_dual_average_ = std::make_unique( + solution.get_pdlp_warm_start_data().initial_dual_average_.release()); + lp_ret.current_ATY_ = std::make_unique( + solution.get_pdlp_warm_start_data().current_ATY_.release()); + lp_ret.sum_primal_solutions_ = std::make_unique( + solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()); + lp_ret.sum_dual_solutions_ = std::make_unique( + solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()); + lp_ret.last_restart_duality_gap_primal_solution_ = std::make_unique( + solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()); + lp_ret.last_restart_duality_gap_dual_solution_ = std::make_unique( + solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()); + lp_ret.initial_primal_weight_ = solution.get_pdlp_warm_start_data().initial_primal_weight_; + lp_ret.initial_step_size_ = solution.get_pdlp_warm_start_data().initial_step_size_; + lp_ret.total_pdlp_iterations_ = solution.get_pdlp_warm_start_data().total_pdlp_iterations_; + lp_ret.total_pdhg_iterations_ = solution.get_pdlp_warm_start_data().total_pdhg_iterations_; + lp_ret.last_candidate_kkt_score_ = solution.get_pdlp_warm_start_data().last_candidate_kkt_score_; + lp_ret.last_restart_kkt_score_ = solution.get_pdlp_warm_start_data().last_restart_kkt_score_; + lp_ret.sum_solution_weight_ = solution.get_pdlp_warm_start_data().sum_solution_weight_; + lp_ret.iterations_since_last_restart_ = + solution.get_pdlp_warm_start_data().iterations_since_last_restart_; + + lp_ret.termination_status_ = solution.get_termination_status(); + lp_ret.error_status_ = solution.get_error_status().get_error_type(); + lp_ret.error_message_ = solution.get_error_status().what(); + lp_ret.l2_primal_residual_ = solution.get_additional_termination_information().l2_primal_residual; + lp_ret.l2_dual_residual_ = solution.get_additional_termination_information().l2_dual_residual; + lp_ret.primal_objective_ = solution.get_additional_termination_information().primal_objective; + lp_ret.dual_objective_ = solution.get_additional_termination_information().dual_objective; + lp_ret.gap_ = solution.get_additional_termination_information().gap; + lp_ret.nb_iterations_ = solution.get_additional_termination_information().number_of_steps_taken; + lp_ret.solve_time_ = solution.get_additional_termination_information().solve_time; + lp_ret.solved_by_pdlp_ = solution.get_additional_termination_information().solved_by_pdlp; return lp_ret; } @@ -206,20 +217,24 @@ mip_ret_t call_solve_mip( error_type_t::ValidationError, "MIP solve cannot be called on an LP problem!"); auto solution = cuopt::linear_programming::solve_mip(op_problem, solver_settings); - mip_ret_t mip_ret{std::make_unique(solution.get_solution().release()), - solution.get_termination_status(), - solution.get_error_status().get_error_type(), - solution.get_error_status().what(), - solution.get_objective_value(), - solution.get_mip_gap(), - solution.get_solution_bound(), - solution.get_total_solve_time(), - solution.get_presolve_time(), - solution.get_max_constraint_violation(), - solution.get_max_int_violation(), - solution.get_max_variable_bound_violation(), - solution.get_num_nodes(), - solution.get_num_simplex_iterations()}; + + mip_ret_t mip_ret; + mip_ret.solution_ = std::make_unique(solution.get_solution().release()); + mip_ret.is_device_memory_ = true; + mip_ret.termination_status_ = solution.get_termination_status(); + mip_ret.error_status_ = solution.get_error_status().get_error_type(); + mip_ret.error_message_ = solution.get_error_status().what(); + mip_ret.objective_ = solution.get_objective_value(); + mip_ret.mip_gap_ = solution.get_mip_gap(); + mip_ret.solution_bound_ = solution.get_solution_bound(); + mip_ret.total_solve_time_ = solution.get_total_solve_time(); + mip_ret.presolve_time_ = solution.get_presolve_time(); + mip_ret.max_constraint_violation_ = solution.get_max_constraint_violation(); + mip_ret.max_int_violation_ = solution.get_max_int_violation(); + mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation(); + mip_ret.nodes_ = solution.get_num_nodes(); + mip_ret.simplex_iterations_ = solution.get_num_simplex_iterations(); + return mip_ret; } @@ -262,40 +277,56 @@ std::unique_ptr call_solve( // Convert solution to linear_programming_ret_t auto term_info = solution.get_additional_termination_information(); - linear_programming_ret_t lp_ret{ - std::make_unique(solution.get_primal_solution().release()), - std::make_unique(solution.get_dual_solution().release()), - std::make_unique(solution.get_reduced_cost().release()), - // Warm start data - create empty buffers to avoid null pointer issues in Python wrapper - std::make_unique(), // current_primal_solution - std::make_unique(), // current_dual_solution - std::make_unique(), // initial_primal_average - std::make_unique(), // initial_dual_average - std::make_unique(), // current_ATY - std::make_unique(), // sum_primal_solutions - std::make_unique(), // sum_dual_solutions - std::make_unique(), // last_restart_duality_gap_primal_solution - std::make_unique(), // last_restart_duality_gap_dual_solution - 0.0, // initial_primal_weight - 0.0, // initial_step_size - 0, // total_pdlp_iterations - 0, // total_pdhg_iterations - 0.0, // last_candidate_kkt_score - 0.0, // last_restart_kkt_score - 0.0, // sum_solution_weight - 0, // iterations_since_last_restart - solution.get_termination_status(), - solution.get_error_status().get_error_type(), - solution.get_error_status().what(), - term_info.l2_primal_residual, - term_info.l2_dual_residual, - term_info.primal_objective, - term_info.dual_objective, - term_info.gap, - term_info.number_of_steps_taken, - solution.get_solve_time(), - false // solved_by_pdlp - }; + linear_programming_ret_t lp_ret; + + if (solution.is_device_memory()) { + // GPU data (shouldn't happen for remote solve, but handle gracefully) + lp_ret.primal_solution_ = + std::make_unique(solution.get_primal_solution().release()); + lp_ret.dual_solution_ = + std::make_unique(solution.get_dual_solution().release()); + lp_ret.reduced_cost_ = + std::make_unique(solution.get_reduced_cost().release()); + lp_ret.is_device_memory_ = true; + } else { + // CPU data from remote solve + lp_ret.primal_solution_host_ = std::move(solution.get_primal_solution_host()); + lp_ret.dual_solution_host_ = std::move(solution.get_dual_solution_host()); + lp_ret.reduced_cost_host_ = std::move(solution.get_reduced_cost_host()); + lp_ret.is_device_memory_ = false; + } + + // Warm start data - create empty buffers to avoid null pointer issues in Python wrapper + lp_ret.current_primal_solution_ = std::make_unique(); + lp_ret.current_dual_solution_ = std::make_unique(); + lp_ret.initial_primal_average_ = std::make_unique(); + lp_ret.initial_dual_average_ = std::make_unique(); + lp_ret.current_ATY_ = std::make_unique(); + lp_ret.sum_primal_solutions_ = std::make_unique(); + lp_ret.sum_dual_solutions_ = std::make_unique(); + lp_ret.last_restart_duality_gap_primal_solution_ = std::make_unique(); + lp_ret.last_restart_duality_gap_dual_solution_ = std::make_unique(); + lp_ret.initial_primal_weight_ = 0.0; + lp_ret.initial_step_size_ = 0.0; + lp_ret.total_pdlp_iterations_ = 0; + lp_ret.total_pdhg_iterations_ = 0; + lp_ret.last_candidate_kkt_score_ = 0.0; + lp_ret.last_restart_kkt_score_ = 0.0; + lp_ret.sum_solution_weight_ = 0.0; + lp_ret.iterations_since_last_restart_ = 0; + + lp_ret.termination_status_ = solution.get_termination_status(); + lp_ret.error_status_ = solution.get_error_status().get_error_type(); + lp_ret.error_message_ = solution.get_error_status().what(); + lp_ret.l2_primal_residual_ = term_info.l2_primal_residual; + lp_ret.l2_dual_residual_ = term_info.l2_dual_residual; + lp_ret.primal_objective_ = term_info.primal_objective; + lp_ret.dual_objective_ = term_info.dual_objective; + lp_ret.gap_ = term_info.gap; + lp_ret.nb_iterations_ = term_info.number_of_steps_taken; + lp_ret.solve_time_ = solution.get_solve_time(); + lp_ret.solved_by_pdlp_ = false; + response.lp_ret = std::move(lp_ret); response.problem_type = linear_programming::problem_category_t::LP; } else { @@ -303,20 +334,32 @@ std::unique_ptr call_solve( auto solution = linear_programming::solve_mip(&handle_, *data_model, solver_settings->get_mip_settings()); - mip_ret_t mip_ret{std::make_unique(solution.get_solution().release()), - solution.get_termination_status(), - solution.get_error_status().get_error_type(), - solution.get_error_status().what(), - solution.get_objective_value(), - solution.get_mip_gap(), - solution.get_solution_bound(), - solution.get_total_solve_time(), - solution.get_presolve_time(), - solution.get_max_constraint_violation(), - solution.get_max_int_violation(), - solution.get_max_variable_bound_violation(), - solution.get_num_nodes(), - solution.get_num_simplex_iterations()}; + mip_ret_t mip_ret; + + if (solution.is_device_memory()) { + // GPU data (shouldn't happen for remote solve, but handle gracefully) + mip_ret.solution_ = std::make_unique(solution.get_solution().release()); + mip_ret.is_device_memory_ = true; + } else { + // CPU data from remote solve + mip_ret.solution_host_ = std::move(solution.get_solution_host()); + mip_ret.is_device_memory_ = false; + } + + mip_ret.termination_status_ = solution.get_termination_status(); + mip_ret.error_status_ = solution.get_error_status().get_error_type(); + mip_ret.error_message_ = solution.get_error_status().what(); + mip_ret.objective_ = solution.get_objective_value(); + mip_ret.mip_gap_ = solution.get_mip_gap(); + mip_ret.solution_bound_ = solution.get_solution_bound(); + mip_ret.total_solve_time_ = solution.get_total_solve_time(); + mip_ret.presolve_time_ = solution.get_presolve_time(); + mip_ret.max_constraint_violation_ = solution.get_max_constraint_violation(); + mip_ret.max_int_violation_ = solution.get_max_int_violation(); + mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation(); + mip_ret.nodes_ = solution.get_num_nodes(); + mip_ret.simplex_iterations_ = solution.get_num_simplex_iterations(); + response.mip_ret = std::move(mip_ret); response.problem_type = linear_programming::problem_category_t::MIP; } diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu index 2ce6d5700..0fc8eff02 100644 --- a/cpp/src/mip/solver_solution.cu +++ b/cpp/src/mip/solver_solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -28,7 +28,8 @@ mip_solution_t::mip_solution_t(rmm::device_uvector solution, f_t max_variable_bound_violation, solver_stats_t stats, std::vector> solution_pool) - : solution_(std::move(solution)), + : solution_(std::make_unique>(std::move(solution))), + is_device_memory_(true), var_names_(std::move(var_names)), objective_(objective), mip_gap_(mip_gap), @@ -46,7 +47,8 @@ template mip_solution_t::mip_solution_t(mip_termination_status_t termination_status, solver_stats_t stats, rmm::cuda_stream_view stream_view) - : solution_(0, stream_view), + : solution_(std::make_unique>(0, stream_view)), + is_device_memory_(true), objective_(0), mip_gap_(0), termination_status_(termination_status), @@ -61,7 +63,65 @@ mip_solution_t::mip_solution_t(mip_termination_status_t termination_st template mip_solution_t::mip_solution_t(const cuopt::logic_error& error_status, rmm::cuda_stream_view stream_view) - : solution_(0, stream_view), + : solution_(std::make_unique>(0, stream_view)), + is_device_memory_(true), + objective_(0), + mip_gap_(0), + termination_status_(mip_termination_status_t::NoTermination), + max_constraint_violation_(0), + max_int_violation_(0), + max_variable_bound_violation_(0), + error_status_(error_status) +{ +} + +// CPU-only constructor for remote solve with solution data +template +mip_solution_t::mip_solution_t(std::vector solution, + std::vector var_names, + f_t objective, + f_t mip_gap, + mip_termination_status_t termination_status, + f_t max_constraint_violation, + f_t max_int_violation, + f_t max_variable_bound_violation, + solver_stats_t stats) + : solution_host_(std::make_unique>(std::move(solution))), + is_device_memory_(false), + var_names_(std::move(var_names)), + objective_(objective), + mip_gap_(mip_gap), + termination_status_(termination_status), + max_constraint_violation_(max_constraint_violation), + max_int_violation_(max_int_violation), + max_variable_bound_violation_(max_variable_bound_violation), + stats_(stats), + error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) +{ +} + +// CPU-only constructor for remote solve error cases +template +mip_solution_t::mip_solution_t(mip_termination_status_t termination_status, + solver_stats_t stats) + : solution_host_(std::make_unique>()), + is_device_memory_(false), + objective_(0), + mip_gap_(0), + termination_status_(termination_status), + max_constraint_violation_(0), + max_int_violation_(0), + max_variable_bound_violation_(0), + stats_(stats), + error_status_(cuopt::logic_error("", cuopt::error_type_t::Success)) +{ +} + +// CPU-only constructor for remote solve error cases +template +mip_solution_t::mip_solution_t(const cuopt::logic_error& error_status) + : solution_host_(std::make_unique>()), + is_device_memory_(false), objective_(0), mip_gap_(0), termination_status_(mip_termination_status_t::NoTermination), @@ -78,16 +138,34 @@ const cuopt::logic_error& mip_solution_t::get_error_status() const return error_status_; } +template +bool mip_solution_t::is_device_memory() const +{ + return is_device_memory_; +} + template const rmm::device_uvector& mip_solution_t::get_solution() const { - return solution_; + return *solution_; } template rmm::device_uvector& mip_solution_t::get_solution() { - return solution_; + return *solution_; +} + +template +std::vector& mip_solution_t::get_solution_host() +{ + return *solution_host_; +} + +template +const std::vector& mip_solution_t::get_solution_host() const +{ + return *solution_host_; } template @@ -211,9 +289,16 @@ void mip_solution_t::write_to_sol_file(std::string_view filename, double objective_value = get_objective_value(); auto& var_names = get_variable_names(); std::vector solution; - solution.resize(solution_.size()); - raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + + if (is_device_memory_) { + // Copy from GPU to CPU + solution.resize(solution_->size()); + raft::copy(solution.data(), solution_->data(), solution_->size(), stream_view.value()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + } else { + // Already on CPU + solution = *solution_host_; + } solution_writer_t::write_solution_to_sol_file( std::string(filename), status, objective_value, var_names, solution); diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd index c140e3d0c..9e211e212 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd +++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -120,9 +120,16 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_solution.hpp" namespace " cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace "cuopt::cython": # noqa cdef cppclass linear_programming_ret_t: + # GPU (device) storage unique_ptr[device_buffer] primal_solution_ unique_ptr[device_buffer] dual_solution_ unique_ptr[device_buffer] reduced_cost_ + # CPU (host) storage for remote solve + vector[double] primal_solution_host_ + vector[double] dual_solution_host_ + vector[double] reduced_cost_host_ + # Flag indicating where solution data is stored + bool is_device_memory_ # PDLP warm start data unique_ptr[device_buffer] current_primal_solution_ unique_ptr[device_buffer] current_dual_solution_ @@ -155,7 +162,12 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace bool solved_by_pdlp_ cdef cppclass mip_ret_t: + # GPU (device) storage unique_ptr[device_buffer] solution_ + # CPU (host) storage for remote solve + vector[double] solution_host_ + # Flag indicating where solution data is stored + bool is_device_memory_ mip_termination_status_t termination_status_ error_type_t error_status_ string error_message_ diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index 1991af0d6..2b77c7150 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -300,9 +300,17 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, sol_ret = move(sol_ret_ptr.get()[0]) if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa - solution = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.mip_ret.solution_) - ) + # Check if data is on GPU or CPU + if sol_ret.mip_ret.is_device_memory_: + # GPU data - use DeviceBuffer + solution = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.mip_ret.solution_) + ) + solution = series_from_buf(solution, pa.float64()).to_numpy() + else: + # CPU data - convert vector directly to numpy + solution = np.array(sol_ret.mip_ret.solution_host_, dtype=np.float64) + termination_status = sol_ret.mip_ret.termination_status_ error_status = sol_ret.mip_ret.error_status_ error_message = sol_ret.mip_ret.error_message_ @@ -317,8 +325,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, num_nodes = sol_ret.mip_ret.nodes_ num_simplex_iterations = sol_ret.mip_ret.simplex_iterations_ - solution = series_from_buf(solution, pa.float64()).to_numpy() - return Solution( ProblemCategory(sol_ret.problem_type), dict(zip(data_model_obj.get_variable_names(), solution)), @@ -339,15 +345,23 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, ) else: - primal_solution = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.primal_solution_) - ) - dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa - reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa + # Check if data is on GPU or CPU + if sol_ret.lp_ret.is_device_memory_: + # GPU data - use DeviceBuffer + primal_solution = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.primal_solution_) + ) + dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa + reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa - primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy() - dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy() - reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy() + primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy() + dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy() + reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy() + else: + # CPU data - convert vectors directly to numpy + primal_solution = np.array(sol_ret.lp_ret.primal_solution_host_, dtype=np.float64) + dual_solution = np.array(sol_ret.lp_ret.dual_solution_host_, dtype=np.float64) + reduced_cost = np.array(sol_ret.lp_ret.reduced_cost_host_, dtype=np.float64) termination_status = sol_ret.lp_ret.termination_status_ error_status = sol_ret.lp_ret.error_status_ From b619ecbc3288db0209649b6c36ef0db8293eeaef Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Fri, 2 Jan 2026 16:44:31 -0500 Subject: [PATCH 06/37] Implement lazy CUDA initialization for GPU-free remote solve - Check is_remote_solve_enabled() before any CUDA initialization in CLI - Skip raft::handle creation and memory resource setup for remote solve - Use CPU-only solution constructors when remote solve returns error - Handle nullptr handle_ptr gracefully in solve_lp/solve_mip - Enables CLI to run on hosts without GPU when remote solve is configured --- cpp/cuopt_cli.cpp | 46 +++++++++++++++++++---------- cpp/src/linear_programming/solve.cu | 20 +++++++++---- cpp/src/mip/solve.cu | 17 +++++++---- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index a3bc29eac..b6af636b7 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,12 +9,13 @@ #include #include #include +#include #include #include +// CUDA headers - only included for local solve path #include #include - #include #include @@ -186,13 +187,18 @@ inline cuopt::init_logger_t dummy_logger( * @param file_path Path to the MPS format input file containing the optimization problem * @param initial_solution_file Path to initial solution file in SOL format * @param settings_strings Map of solver parameters + * @param is_remote_solve Whether remote solve is enabled (skips CUDA handle creation) */ int run_single_file(const std::string& file_path, const std::string& initial_solution_file, bool solve_relaxation, - const std::map& settings_strings) + const std::map& settings_strings, + bool is_remote_solve) { - const raft::handle_t handle_{}; + // Only create raft handle for local solve - it triggers CUDA initialization + std::unique_ptr handle_ptr; + if (!is_remote_solve) { handle_ptr = std::make_unique(); } + cuopt::linear_programming::solver_settings_t settings; try { @@ -264,16 +270,17 @@ int run_single_file(const std::string& file_path, auto view = create_view_from_mps_data_model(mps_data_model); try { + // Pass handle_ptr.get() - can be nullptr for remote solve if (is_mip) { auto& mip_settings = settings.get_mip_settings(); - auto solution = cuopt::linear_programming::solve_mip(&handle_, view, mip_settings); + auto solution = cuopt::linear_programming::solve_mip(handle_ptr.get(), view, mip_settings); if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) { CUOPT_LOG_ERROR("MIP solve failed: %s", solution.get_error_status().what()); return -1; } } else { auto& lp_settings = settings.get_pdlp_settings(); - auto solution = cuopt::linear_programming::solve_lp(&handle_, view, lp_settings); + auto solution = cuopt::linear_programming::solve_lp(handle_ptr.get(), view, lp_settings); if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) { CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what()); return -1; @@ -451,19 +458,26 @@ int main(int argc, char* argv[]) const auto initial_solution_file = program.get("--initial-solution"); const auto solve_relaxation = program.get("--relaxation"); - // All arguments are parsed as string, default values are parsed as int if unused. - const auto num_gpus = program.is_used("--num-gpus") - ? std::stoi(program.get("--num-gpus")) - : program.get("--num-gpus"); + // Check for remote solve BEFORE any CUDA initialization + const bool is_remote_solve = cuopt::linear_programming::is_remote_solve_enabled(); std::vector> memory_resources; - for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { - cudaSetDevice(i); - memory_resources.push_back(make_async()); - rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get()); + if (!is_remote_solve) { + // Only initialize CUDA resources for local solve + // All arguments are parsed as string, default values are parsed as int if unused. + const auto num_gpus = program.is_used("--num-gpus") + ? std::stoi(program.get("--num-gpus")) + : program.get("--num-gpus"); + + for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) { + cudaSetDevice(i); + memory_resources.push_back(make_async()); + rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get()); + } + cudaSetDevice(0); } - cudaSetDevice(0); - return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings); + return run_single_file( + file_name, initial_solution_file, solve_relaxation, settings_strings, is_remote_solve); } diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 4e5c9c6a0..73407ee1f 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1404,7 +1404,13 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ if (view.is_device_memory()) { if (remote_config.has_value()) { - // GPU data + remote solve requested: copy to CPU and proceed with remote + // GPU data + remote solve requested: need valid handle to copy GPU→CPU + if (handle_ptr == nullptr) { + CUOPT_LOG_ERROR( + "[solve_lp] Remote solve requested with GPU data but no CUDA handle. " + "This is an internal error - GPU data should not exist without CUDA initialization."); + return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError); + } CUOPT_LOG_WARN( "[solve_lp] Remote solve requested but data is on GPU. " "Copying to CPU for serialization (performance impact)."); @@ -1416,8 +1422,9 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ remote_config->port); // TODO: Implement remote solve - serialize cpu_view and send to remote server CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); - return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, - handle_ptr->get_stream()); + // Use CPU-only error constructor + return optimization_problem_solution_t( + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); } // Local solve: data already on GPU - convert view to optimization_problem_t and solve @@ -1432,8 +1439,9 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ remote_config->port); // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); - return optimization_problem_solution_t(pdlp_termination_status_t::NumericalError, - handle_ptr->get_stream()); + // Use CPU-only error constructor + return optimization_problem_solution_t( + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); } // Local solve with CPU data: copy to GPU and solve diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index 041cd0620..d5d9238c1 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -520,7 +520,14 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, if (view.is_device_memory()) { if (remote_config.has_value()) { - // GPU data + remote solve requested: copy to CPU and proceed with remote + // GPU data + remote solve requested: need valid handle to copy GPU→CPU + if (handle_ptr == nullptr) { + CUOPT_LOG_ERROR( + "[solve_mip] Remote solve requested with GPU data but no CUDA handle. " + "This is an internal error - GPU data should not exist without CUDA initialization."); + return mip_solution_t( + cuopt::logic_error("No CUDA handle for GPU data", cuopt::error_type_t::RuntimeError)); + } CUOPT_LOG_WARN( "[solve_mip] Remote solve requested but data is on GPU. " "Copying to CPU for serialization (performance impact)."); @@ -533,9 +540,9 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, remote_config->port); // TODO: Implement remote solve - serialize cpu_view and send to remote server CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); + // Use CPU-only constructor - no stream needed for remote solve error return mip_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError), - handle_ptr->get_stream()); + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); } // Local solve: data already on GPU - convert view to optimization_problem_t and solve @@ -550,9 +557,9 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, remote_config->port); // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); + // Use CPU-only constructor - no stream/handle needed for remote solve return mip_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError), - handle_ptr->get_stream()); + cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); } // Local solve with CPU data: copy to GPU and solve From 79c5153e83b36263df65b3ec93d79f3977de98a1 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Fri, 2 Jan 2026 17:05:02 -0500 Subject: [PATCH 07/37] Implement lazy CUDA initialization for C API - Make raft::handle_t in problem_and_stream_view_t lazy (unique_ptr) - Make stream_view in solution_and_stream_view_t optional - Check is_remote_solve_enabled() before creating handle in cuOptSolve - Update cuOptGetPrimalSolution/DualSolution/ReducedCosts to handle CPU data - Enables C API to run on hosts without GPU when remote solve is configured --- cpp/src/linear_programming/cuopt_c.cpp | 136 +++++++++++++++---------- 1 file changed, 84 insertions(+), 52 deletions(-) diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp index b2dc0101b..dcf354914 100644 --- a/cpp/src/linear_programming/cuopt_c.cpp +++ b/cpp/src/linear_programming/cuopt_c.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -146,15 +147,22 @@ struct problem_cpu_data_t { }; struct problem_and_stream_view_t { - problem_and_stream_view_t() - : cpu_data(nullptr), - gpu_problem(nullptr), - stream_view(rmm::cuda_stream_per_thread), - handle(stream_view) + problem_and_stream_view_t() : cpu_data(nullptr), gpu_problem(nullptr), handle(nullptr) {} + + /** + * @brief Ensure CUDA resources are initialized (lazy initialization). + * Only call this when local solve is needed. + */ + void ensure_cuda_initialized() { + if (!handle) { handle = std::make_unique(); } } - raft::handle_t* get_handle_ptr() { return &handle; } + raft::handle_t* get_handle_ptr() + { + ensure_cuda_initialized(); + return handle.get(); + } /** * @brief Check if this is a MIP problem. @@ -182,8 +190,8 @@ struct problem_and_stream_view_t { // Use view.is_device_memory() to check if data is on GPU or CPU cuopt::linear_programming::data_model_view_t view; - rmm::cuda_stream_view stream_view; - raft::handle_t handle; + // Lazy-initialized CUDA handle (only created for local solve) + std::unique_ptr handle; /** * @brief Create a view pointing to GPU data from the gpu_problem. @@ -246,17 +254,16 @@ struct problem_and_stream_view_t { }; struct solution_and_stream_view_t { - solution_and_stream_view_t(bool solution_for_mip, rmm::cuda_stream_view stream_view) - : is_mip(solution_for_mip), - mip_solution_ptr(nullptr), - lp_solution_ptr(nullptr), - stream_view(stream_view) + solution_and_stream_view_t(bool solution_for_mip, raft::handle_t* handle_ptr = nullptr) + : is_mip(solution_for_mip), mip_solution_ptr(nullptr), lp_solution_ptr(nullptr) { + // Store stream only if we have a handle (local solve) + if (handle_ptr) { stream_view = handle_ptr->get_stream(); } } bool is_mip; mip_solution_t* mip_solution_ptr; optimization_problem_solution_t* lp_solution_ptr; - rmm::cuda_stream_view stream_view; + std::optional stream_view; // Only present for local solve }; int8_t cuOptGetFloatSize() { return sizeof(cuopt_float_t); } @@ -1275,7 +1282,7 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solver_settings->get_mip_settings(); solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); + new solution_and_stream_view_t(true, problem_and_stream_view->handle.get()); solution_and_stream_view->mip_solution_ptr = new mip_solution_t( solve_mip(gpu_problem, mip_settings)); @@ -1291,7 +1298,7 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solver_settings->get_pdlp_settings(); solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(false, problem_and_stream_view->stream_view); + new solution_and_stream_view_t(false, problem_and_stream_view->handle.get()); solution_and_stream_view->lp_solution_ptr = new optimization_problem_solution_t( @@ -1306,16 +1313,20 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, } } else { // CPU path: use view directly - solve_lp/solve_mip handle remote vs local conversion + // For remote solve, handle may be nullptr (no CUDA) + // For local solve with CPU data, handle will be created lazily + raft::handle_t* handle_ptr = + is_remote_solve_enabled() ? nullptr : problem_and_stream_view->get_handle_ptr(); + if (is_mip) { mip_solver_settings_t& mip_settings = solver_settings->get_mip_settings(); solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(true, problem_and_stream_view->stream_view); + new solution_and_stream_view_t(true, handle_ptr); - solution_and_stream_view->mip_solution_ptr = - new mip_solution_t(solve_mip( - problem_and_stream_view->get_handle_ptr(), view, mip_settings)); + solution_and_stream_view->mip_solution_ptr = new mip_solution_t( + solve_mip(handle_ptr, view, mip_settings)); *solution_ptr = static_cast(solution_and_stream_view); @@ -1328,12 +1339,11 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem, solver_settings->get_pdlp_settings(); solution_and_stream_view_t* solution_and_stream_view = - new solution_and_stream_view_t(false, problem_and_stream_view->stream_view); + new solution_and_stream_view_t(false, handle_ptr); solution_and_stream_view->lp_solution_ptr = new optimization_problem_solution_t( - solve_lp( - problem_and_stream_view->get_handle_ptr(), view, pdlp_settings)); + solve_lp(handle_ptr, view, pdlp_settings)); *solution_ptr = static_cast(solution_and_stream_view); @@ -1432,24 +1442,34 @@ cuopt_int_t cuOptGetPrimalSolution(cuOptSolution solution, cuopt_float_t* soluti mip_solution_t* mip_solution = static_cast*>( solution_and_stream_view->mip_solution_ptr); - const rmm::device_uvector& solution_values = mip_solution->get_solution(); - rmm::cuda_stream_view stream_view{}; - raft::copy(solution_values_ptr, - solution_values.data(), - solution_values.size(), - solution_and_stream_view->stream_view); - solution_and_stream_view->stream_view.synchronize(); + if (mip_solution->is_device_memory()) { + const rmm::device_uvector& solution_values = mip_solution->get_solution(); + raft::copy(solution_values_ptr, + solution_values.data(), + solution_values.size(), + solution_and_stream_view->stream_view.value()); + solution_and_stream_view->stream_view->synchronize(); + } else { + const std::vector& solution_values = mip_solution->get_solution_host(); + std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr); + } } else { optimization_problem_solution_t* optimization_problem_solution = static_cast*>( solution_and_stream_view->lp_solution_ptr); - const rmm::device_uvector& solution_values = - optimization_problem_solution->get_primal_solution(); - raft::copy(solution_values_ptr, - solution_values.data(), - solution_values.size(), - solution_and_stream_view->stream_view); - solution_and_stream_view->stream_view.synchronize(); + if (optimization_problem_solution->is_device_memory()) { + const rmm::device_uvector& solution_values = + optimization_problem_solution->get_primal_solution(); + raft::copy(solution_values_ptr, + solution_values.data(), + solution_values.size(), + solution_and_stream_view->stream_view.value()); + solution_and_stream_view->stream_view->synchronize(); + } else { + const std::vector& solution_values = + optimization_problem_solution->get_primal_solution_host(); + std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr); + } } return CUOPT_SUCCESS; } @@ -1540,13 +1560,19 @@ cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_sol optimization_problem_solution_t* optimization_problem_solution = static_cast*>( solution_and_stream_view->lp_solution_ptr); - const rmm::device_uvector& dual_solution = - optimization_problem_solution->get_dual_solution(); - raft::copy(dual_solution_ptr, - dual_solution.data(), - dual_solution.size(), - solution_and_stream_view->stream_view); - solution_and_stream_view->stream_view.synchronize(); + if (optimization_problem_solution->is_device_memory()) { + const rmm::device_uvector& dual_solution = + optimization_problem_solution->get_dual_solution(); + raft::copy(dual_solution_ptr, + dual_solution.data(), + dual_solution.size(), + solution_and_stream_view->stream_view.value()); + solution_and_stream_view->stream_view->synchronize(); + } else { + const std::vector& dual_solution = + optimization_problem_solution->get_dual_solution_host(); + std::copy(dual_solution.begin(), dual_solution.end(), dual_solution_ptr); + } return CUOPT_SUCCESS; } } @@ -1581,13 +1607,19 @@ cuopt_int_t cuOptGetReducedCosts(cuOptSolution solution, cuopt_float_t* reduced_ optimization_problem_solution_t* optimization_problem_solution = static_cast*>( solution_and_stream_view->lp_solution_ptr); - const rmm::device_uvector& reduced_cost = - optimization_problem_solution->get_reduced_cost(); - raft::copy(reduced_cost_ptr, - reduced_cost.data(), - reduced_cost.size(), - solution_and_stream_view->stream_view); - solution_and_stream_view->stream_view.synchronize(); + if (optimization_problem_solution->is_device_memory()) { + const rmm::device_uvector& reduced_cost = + optimization_problem_solution->get_reduced_cost(); + raft::copy(reduced_cost_ptr, + reduced_cost.data(), + reduced_cost.size(), + solution_and_stream_view->stream_view.value()); + solution_and_stream_view->stream_view->synchronize(); + } else { + const std::vector& reduced_cost = + optimization_problem_solution->get_reduced_cost_host(); + std::copy(reduced_cost.begin(), reduced_cost.end(), reduced_cost_ptr); + } return CUOPT_SUCCESS; } } From 955e62c3ea8fb8d47e5f586c7d1ea39ab622aa63 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 5 Jan 2026 11:18:57 -0500 Subject: [PATCH 08/37] Implement lazy CUDA initialization for Python API - cython_solve.cu: Check remote solve before CUDA stream/handle creation - Remote solve path passes nullptr handle to solve_lp/solve_mip - Local solve path creates CUDA resources only when needed - cuopt/__init__.py: Make linear_programming and routing imports lazy - Uses __getattr__ for deferred module loading - Allows 'import cuopt' on CPU-only hosts with remote solve - solver_wrapper.pyx: Lazy CUDA imports and remote solve detection - Remove eager imports of cupy, cudf, numba.cuda - Add is_remote_solve_enabled() Python function - Add _get_cuda_imports() for lazy module loading - Update create_solution() to check is_device_memory_ for warm start - utilities/__init__.py: Lazy imports for CUDA-dependent utilities - type_cast, series_from_buf, check_solution via __getattr__ - utilities/utils.py: Lazy cudf/pylibcudf imports in series_from_buf Enables Python programs to run on CPU-only hosts when remote solve is configured via CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT. --- .../utilities/cython_solve.cu | 34 +-- python/cuopt/cuopt/__init__.py | 22 +- .../solver/solver_wrapper.pyx | 238 +++++++++++------- python/cuopt/cuopt/utilities/__init__.py | 21 +- python/cuopt/cuopt/utilities/utils.py | 12 +- 5 files changed, 214 insertions(+), 113 deletions(-) diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu index 972f073ee..f0dd24706 100644 --- a/cpp/src/linear_programming/utilities/cython_solve.cu +++ b/cpp/src/linear_programming/utilities/cython_solve.cu @@ -244,16 +244,7 @@ std::unique_ptr call_solve( unsigned int flags, bool is_batch_mode) { - raft::common::nvtx::range fun_scope("Call Solve"); - - // FIX: Use default handle constructor like CLI does, instead of explicit stream creation - // Original code created a non-blocking stream which causes synchronization issues with PDLP - // This is a workaround to fix the synchronization issues, please fix this in the future and - // remove this workaround. cudaStream_t stream; RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, - // flags)); // flags=cudaStreamNonBlocking const raft::handle_t handle_{stream}; - const raft::handle_t handle_{}; - - // Check if remote solve is configured + // Check if remote solve is configured FIRST (before any CUDA operations) if (linear_programming::is_remote_solve_enabled()) { // Data coming from Python is in CPU memory - mark it as such data_model->set_is_device_memory(false); @@ -271,9 +262,9 @@ std::unique_ptr call_solve( } if (!is_mip) { - // LP: call solve_lp with view - it will handle remote + // LP: call solve_lp with nullptr handle - remote solve doesn't need GPU auto solution = - linear_programming::solve_lp(&handle_, *data_model, solver_settings->get_pdlp_settings()); + linear_programming::solve_lp(nullptr, *data_model, solver_settings->get_pdlp_settings()); // Convert solution to linear_programming_ret_t auto term_info = solution.get_additional_termination_information(); @@ -289,7 +280,10 @@ std::unique_ptr call_solve( std::make_unique(solution.get_reduced_cost().release()); lp_ret.is_device_memory_ = true; } else { - // CPU data from remote solve + // CPU data from remote solve - initialize empty device buffers for Python compatibility + lp_ret.primal_solution_ = std::make_unique(); + lp_ret.dual_solution_ = std::make_unique(); + lp_ret.reduced_cost_ = std::make_unique(); lp_ret.primal_solution_host_ = std::move(solution.get_primal_solution_host()); lp_ret.dual_solution_host_ = std::move(solution.get_dual_solution_host()); lp_ret.reduced_cost_host_ = std::move(solution.get_reduced_cost_host()); @@ -330,9 +324,9 @@ std::unique_ptr call_solve( response.lp_ret = std::move(lp_ret); response.problem_type = linear_programming::problem_category_t::LP; } else { - // MIP: call solve_mip with view - it will handle remote + // MIP: call solve_mip with nullptr handle - remote solve doesn't need GPU auto solution = - linear_programming::solve_mip(&handle_, *data_model, solver_settings->get_mip_settings()); + linear_programming::solve_mip(nullptr, *data_model, solver_settings->get_mip_settings()); mip_ret_t mip_ret; @@ -341,7 +335,8 @@ std::unique_ptr call_solve( mip_ret.solution_ = std::make_unique(solution.get_solution().release()); mip_ret.is_device_memory_ = true; } else { - // CPU data from remote solve + // CPU data from remote solve - initialize empty device buffer for Python compatibility + mip_ret.solution_ = std::make_unique(); mip_ret.solution_host_ = std::move(solution.get_solution_host()); mip_ret.is_device_memory_ = false; } @@ -367,6 +362,13 @@ std::unique_ptr call_solve( return std::make_unique(std::move(response)); } + // Local solve: create CUDA resources only when needed + raft::common::nvtx::range fun_scope("Call Solve"); + + // FIX: Use default handle constructor like CLI does, instead of explicit stream creation + // Original code created a non-blocking stream which causes synchronization issues with PDLP + const raft::handle_t handle_{}; + // Local solve: proceed as before - create GPU problem and solve auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_); solver_ret_t response; diff --git a/python/cuopt/cuopt/__init__.py b/python/cuopt/cuopt/__init__.py index c6e9150c8..7ea141221 100644 --- a/python/cuopt/cuopt/__init__.py +++ b/python/cuopt/cuopt/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 try: @@ -9,5 +9,23 @@ libcuopt.load_library() del libcuopt -from cuopt import linear_programming, routing from cuopt._version import __git_commit__, __version__, __version_major_minor__ + +# Lazy imports for linear_programming and routing modules +# This allows cuopt to be imported on CPU-only hosts when remote solve is configured +_submodules = ["linear_programming", "routing"] + + +def __getattr__(name): + """Lazy import submodules to support CPU-only hosts with remote solve.""" + if name in _submodules: + import importlib + return importlib.import_module(f"cuopt.{name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return __all__ + _submodules + + +__all__ = ["__git_commit__", "__version__", "__version_major_minor__"] diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx index 2b77c7150..adcaf9dfc 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx +++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx @@ -13,8 +13,6 @@ from datetime import date, datetime from dateutil.relativedelta import relativedelta -from cuopt.utilities import type_cast - from libc.stdint cimport uintptr_t from libc.stdlib cimport free, malloc from libc.string cimport memcpy, strcpy, strlen @@ -42,25 +40,56 @@ from cuopt.linear_programming.solver.solver cimport ( ) import math +import os import sys import warnings from enum import IntEnum -import cupy as cp import numpy as np -from numba import cuda - -import cudf from cuopt.linear_programming.solver_settings.solver_settings import ( PDLPSolverMode, SolverSettings, ) -from cuopt.utilities import InputValidationError, series_from_buf +from cuopt.utilities import InputValidationError import pyarrow as pa +def is_remote_solve_enabled(): + """Check if remote solve is enabled via environment variables. + + Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT + environment variables are set and valid. + + Returns + ------- + bool + True if remote solve is enabled, False otherwise. + """ + host = os.environ.get("CUOPT_REMOTE_HOST", "") + port = os.environ.get("CUOPT_REMOTE_PORT", "") + + if host and port: + try: + int(port) # Validate port is a valid integer + return True + except ValueError: + return False + return False + + +def _get_cuda_imports(): + """Lazily import CUDA-dependent modules. + + Only call this when GPU operations are actually needed. + """ + import cupy as cp + import cudf + from cuopt.utilities import series_from_buf + return cp, cudf, series_from_buf + + cdef extern from "cuopt/linear_programming/utilities/internals.hpp" namespace "cuopt::internals": # noqa cdef cppclass base_solution_callback_t @@ -108,39 +137,52 @@ cdef char* c_get_string(string in_str): def get_data_ptr(array): - if isinstance(array, cudf.Series): - return array.__cuda_array_interface__['data'][0] - elif isinstance(array, np.ndarray): + """Get the data pointer from an array. + + Works with both numpy arrays (CPU) and cudf Series (GPU). + """ + if isinstance(array, np.ndarray): return array.__array_interface__['data'][0] + elif hasattr(array, '__cuda_array_interface__'): + # cudf.Series or other CUDA array + return array.__cuda_array_interface__['data'][0] else: raise Exception( "get_data_ptr must be called with cudf.Series or np.ndarray" ) -def type_cast(cudf_obj, np_type, name): - if isinstance(cudf_obj, cudf.Series): - cudf_type = cudf_obj.dtype - elif isinstance(cudf_obj, np.ndarray): - cudf_type = cudf_obj.dtype - elif isinstance(cudf_obj, cudf.DataFrame): - if all([np.issubdtype(dtype, np.number) for dtype in cudf_obj.dtypes]): # noqa - cudf_type = cudf_obj.dtypes[0] +def type_cast(obj, np_type, name): + """Cast array to the specified numpy type. + + Works with both numpy arrays and cudf objects. + """ + if isinstance(obj, np.ndarray): + obj_type = obj.dtype + elif hasattr(obj, 'dtype'): + obj_type = obj.dtype + elif hasattr(obj, 'dtypes'): + # DataFrame-like object + if all([np.issubdtype(dtype, np.number) for dtype in obj.dtypes]): # noqa + obj_type = obj.dtypes[0] else: msg = "All columns in " + name + " should be numeric" raise Exception(msg) + else: + obj_type = type(obj) + if ((np.issubdtype(np_type, np.floating) and - (not np.issubdtype(cudf_type, np.floating))) + (not np.issubdtype(obj_type, np.floating))) or (np.issubdtype(np_type, np.integer) and - (not np.issubdtype(cudf_type, np.integer))) + (not np.issubdtype(obj_type, np.integer))) or (np.issubdtype(np_type, np.bool_) and - (not np.issubdtype(cudf_type, np.bool_))) + (not np.issubdtype(obj_type, np.bool_))) or (np.issubdtype(np_type, np.int8) and - (not np.issubdtype(cudf_type, np.int8)))): - msg = "Casting " + name + " from " + str(cudf_type) + " to " + str(np.dtype(np_type)) # noqa + (not np.issubdtype(obj_type, np.int8)))): + msg = "Casting " + name + " from " + str(obj_type) + " to " + str(np.dtype(np_type)) # noqa warnings.warn(msg) - cudf_obj = cudf_obj.astype(np.dtype(np_type)) - return cudf_obj + obj = obj.astype(np.dtype(np_type)) + return obj cdef set_solver_setting( @@ -291,6 +333,12 @@ cdef set_solver_setting( settings.get_pdlp_warm_start_data().iterations_since_last_restart # noqa ) +cdef _convert_device_buffer_to_numpy(device_buffer, dtype): + """Convert a DeviceBuffer to numpy array using lazy CUDA imports.""" + _, _, series_from_buf = _get_cuda_imports() + return series_from_buf(device_buffer, dtype).to_numpy() + + cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, DataModel data_model_obj, is_batch=False): @@ -302,11 +350,11 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa # Check if data is on GPU or CPU if sol_ret.mip_ret.is_device_memory_: - # GPU data - use DeviceBuffer + # GPU data - use DeviceBuffer with lazy imports solution = DeviceBuffer.c_from_unique_ptr( move(sol_ret.mip_ret.solution_) ) - solution = series_from_buf(solution, pa.float64()).to_numpy() + solution = _convert_device_buffer_to_numpy(solution, pa.float64()) else: # CPU data - convert vector directly to numpy solution = np.array(sol_ret.mip_ret.solution_host_, dtype=np.float64) @@ -347,16 +395,16 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, else: # Check if data is on GPU or CPU if sol_ret.lp_ret.is_device_memory_: - # GPU data - use DeviceBuffer + # GPU data - use DeviceBuffer with lazy imports primal_solution = DeviceBuffer.c_from_unique_ptr( move(sol_ret.lp_ret.primal_solution_) ) dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa - primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy() - dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy() - reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy() + primal_solution = _convert_device_buffer_to_numpy(primal_solution, pa.float64()) + dual_solution = _convert_device_buffer_to_numpy(dual_solution, pa.float64()) + reduced_cost = _convert_device_buffer_to_numpy(reduced_cost, pa.float64()) else: # CPU data - convert vectors directly to numpy primal_solution = np.array(sol_ret.lp_ret.primal_solution_host_, dtype=np.float64) @@ -377,33 +425,77 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, # In BatchSolve, we don't get the warm start data if not is_batch: - current_primal_solution = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.current_primal_solution_) - ) - current_dual_solution = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.current_dual_solution_) - ) - initial_primal_average = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.initial_primal_average_) - ) - initial_dual_average = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.initial_dual_average_) - ) - current_ATY = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.current_ATY_) - ) - sum_primal_solutions = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.sum_primal_solutions_) - ) - sum_dual_solutions = DeviceBuffer.c_from_unique_ptr( - move(sol_ret.lp_ret.sum_dual_solutions_) - ) - last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa - move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_) - ) - last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa - move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_) - ) + # Warm start data is only available for GPU solves + if sol_ret.lp_ret.is_device_memory_: + current_primal_solution = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.current_primal_solution_) + ) + current_dual_solution = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.current_dual_solution_) + ) + initial_primal_average = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.initial_primal_average_) + ) + initial_dual_average = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.initial_dual_average_) + ) + current_ATY = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.current_ATY_) + ) + sum_primal_solutions = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.sum_primal_solutions_) + ) + sum_dual_solutions = DeviceBuffer.c_from_unique_ptr( + move(sol_ret.lp_ret.sum_dual_solutions_) + ) + last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa + move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_) + ) + last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa + move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_) + ) + + current_primal_solution = _convert_device_buffer_to_numpy( + current_primal_solution, pa.float64() + ) + current_dual_solution = _convert_device_buffer_to_numpy( + current_dual_solution, pa.float64() + ) + initial_primal_average = _convert_device_buffer_to_numpy( + initial_primal_average, pa.float64() + ) + initial_dual_average = _convert_device_buffer_to_numpy( + initial_dual_average, pa.float64() + ) + current_ATY = _convert_device_buffer_to_numpy( + current_ATY, pa.float64() + ) + sum_primal_solutions = _convert_device_buffer_to_numpy( + sum_primal_solutions, pa.float64() + ) + sum_dual_solutions = _convert_device_buffer_to_numpy( + sum_dual_solutions, pa.float64() + ) + last_restart_duality_gap_primal_solution = _convert_device_buffer_to_numpy( + last_restart_duality_gap_primal_solution, + pa.float64() + ) + last_restart_duality_gap_dual_solution = _convert_device_buffer_to_numpy( + last_restart_duality_gap_dual_solution, + pa.float64() + ) + else: + # CPU/remote solve - no warm start data available + current_primal_solution = np.array([], dtype=np.float64) + current_dual_solution = np.array([], dtype=np.float64) + initial_primal_average = np.array([], dtype=np.float64) + initial_dual_average = np.array([], dtype=np.float64) + current_ATY = np.array([], dtype=np.float64) + sum_primal_solutions = np.array([], dtype=np.float64) + sum_dual_solutions = np.array([], dtype=np.float64) + last_restart_duality_gap_primal_solution = np.array([], dtype=np.float64) + last_restart_duality_gap_dual_solution = np.array([], dtype=np.float64) + initial_primal_weight = sol_ret.lp_ret.initial_primal_weight_ initial_step_size = sol_ret.lp_ret.initial_step_size_ total_pdlp_iterations = sol_ret.lp_ret.total_pdlp_iterations_ @@ -413,36 +505,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr, sum_solution_weight = sol_ret.lp_ret.sum_solution_weight_ iterations_since_last_restart = sol_ret.lp_ret.iterations_since_last_restart_ # noqa - current_primal_solution = series_from_buf( - current_primal_solution, pa.float64() - ).to_numpy() - current_dual_solution = series_from_buf( - current_dual_solution, pa.float64() - ).to_numpy() - initial_primal_average = series_from_buf( - initial_primal_average, pa.float64() - ).to_numpy() - initial_dual_average = series_from_buf( - initial_dual_average, pa.float64() - ).to_numpy() - current_ATY = series_from_buf( - current_ATY, pa.float64() - ).to_numpy() - sum_primal_solutions = series_from_buf( - sum_primal_solutions, pa.float64() - ).to_numpy() - sum_dual_solutions = series_from_buf( - sum_dual_solutions, pa.float64() - ).to_numpy() - last_restart_duality_gap_primal_solution = series_from_buf( - last_restart_duality_gap_primal_solution, - pa.float64() - ).to_numpy() - last_restart_duality_gap_dual_solution = series_from_buf( - last_restart_duality_gap_dual_solution, - pa.float64() - ).to_numpy() - return Solution( ProblemCategory(sol_ret.problem_type), dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa diff --git a/python/cuopt/cuopt/utilities/__init__.py b/python/cuopt/cuopt/utilities/__init__.py index 8706b9451..445fe5101 100644 --- a/python/cuopt/cuopt/utilities/__init__.py +++ b/python/cuopt/cuopt/utilities/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from cuopt.utilities.exception_handler import ( @@ -7,5 +7,20 @@ OutOfMemoryError, catch_cuopt_exception, ) -from cuopt.utilities.type_casting import type_cast -from cuopt.utilities.utils import check_solution, series_from_buf + +# Lazy imports for CUDA-dependent modules to support CPU-only hosts +# These will be imported when first accessed + + +def __getattr__(name): + """Lazy import CUDA-dependent utilities.""" + if name == "type_cast": + from cuopt.utilities.type_casting import type_cast + return type_cast + elif name == "series_from_buf": + from cuopt.utilities.utils import series_from_buf + return series_from_buf + elif name == "check_solution": + from cuopt.utilities.utils import check_solution + return check_solution + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/python/cuopt/cuopt/utilities/utils.py b/python/cuopt/cuopt/utilities/utils.py index b92968d0e..5b8d46b69 100644 --- a/python/cuopt/cuopt/utilities/utils.py +++ b/python/cuopt/cuopt/utilities/utils.py @@ -1,11 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import numpy as np -import cudf -import pylibcudf as plc - from cuopt.linear_programming.solver.solver_parameters import ( CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, CUOPT_MIP_INTEGRALITY_TOLERANCE, @@ -16,6 +13,9 @@ def series_from_buf(buf, dtype): """Helper function to create a cudf series from a buffer. + This function lazily imports cudf and pylibcudf to support + CPU-only execution when remote solve is enabled. + Parameters ---------- buf : cudf.core.buffer.Buffer @@ -28,6 +28,10 @@ def series_from_buf(buf, dtype): cudf.Series A cudf Series built from the buffer """ + # Lazy imports to support CPU-only hosts with remote solve + import cudf + import pylibcudf as plc + col = plc.column.Column.from_rmm_buffer( buf, dtype=plc.types.DataType.from_arrow(dtype), From d167ebb5a99cc84a0b88af853554c273ed1d4e47 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 6 Jan 2026 11:09:11 -0500 Subject: [PATCH 09/37] Add remote solve server with real-time log streaming Features: - Remote solve server (cuopt_remote_server) with protobuf serialization - Real-time log streaming from server to client via stdout capture - Transparent remote solve via CUOPT_REMOTE_HOST/PORT environment variables - Pluggable serialization interface (remote_serialization.hpp) - Static linking of libmps_parser into libcuopt for simpler deployment Components: - cuopt_remote_server.cpp: Server with streaming protocol - cuopt_remote.proto: Protocol Buffers schema for LP/MIP problems - protobuf_serializer.cu: Serialization implementation - remote_solve.cu: Client-side remote solve with streaming support Build: - Added BUILD_REMOTE_SERVER cmake option - Added cuopt_remote_server target to build.sh - Added mps_parser_static library for static linking The server captures stdout during solve and streams it to clients while also echoing to the server console (for docker logs, monitoring, etc.). --- build.sh | 27 +- cpp/CMakeLists.txt | 102 +- cpp/cuopt_cli.cpp | 2 + cpp/cuopt_remote_server.cpp | 640 ++++++++++++ .../mip/solver_solution.hpp | 75 ++ .../pdlp/solver_solution.hpp | 125 +++ .../utilities/remote_serialization.hpp | 288 ++++++ .../utilities/remote_solve.hpp | 40 +- cpp/libmps_parser/CMakeLists.txt | 32 +- cpp/libmps_parser/src/data_model_view.cpp | 7 +- cpp/libmps_parser/src/mps_data_model.cpp | 6 +- cpp/scripts/run_dev_server.sh | 24 + cpp/src/linear_programming/CMakeLists.txt | 4 +- cpp/src/linear_programming/solve.cu | 15 +- cpp/src/linear_programming/solver_solution.cu | 130 +++ .../utilities/cuopt_remote.proto | 289 ++++++ .../utilities/protobuf_serializer.cu | 924 ++++++++++++++++++ .../utilities/remote_solve.cu | 360 +++++++ cpp/src/mip/solver_solution.cu | 89 ++ 19 files changed, 3159 insertions(+), 20 deletions(-) create mode 100644 cpp/cuopt_remote_server.cpp create mode 100644 cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp create mode 100755 cpp/scripts/run_dev_server.sh create mode 100644 cpp/src/linear_programming/utilities/cuopt_remote.proto create mode 100644 cpp/src/linear_programming/utilities/protobuf_serializer.cu create mode 100644 cpp/src/linear_programming/utilities/remote_solve.cu diff --git a/build.sh b/build.sh index 1ee8e87fc..fd85e6731 100755 --- a/build.sh +++ b/build.sh @@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client cuopt_remote_server docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -25,6 +25,7 @@ HELP="$0 [ ...] [ ...] cuopt - build the cuopt Python package cuopt_server - build the cuopt_server Python package cuopt_sh_client - build cuopt self host client + cuopt_remote_server - build the cuopt remote solve server executable docs - build the docs deb - build deb package (requires libcuopt to be built first) and is: @@ -390,6 +391,30 @@ if buildAll || hasArg libcuopt; then fi fi +################################################################################ +# Build the cuopt remote solve server +if hasArg cuopt_remote_server; then + if [ ! -d "${LIBCUOPT_BUILD_DIR}" ]; then + echo "Error: libcuopt must be built before cuopt_remote_server. Run with 'libcuopt' target first." + exit 1 + fi + cd "${LIBCUOPT_BUILD_DIR}" + + # Reconfigure with BUILD_REMOTE_SERVER=ON + cmake -DBUILD_REMOTE_SERVER=ON "${LIBCUOPT_BUILD_DIR}" + + # Build the server target + cmake --build "${LIBCUOPT_BUILD_DIR}" --target cuopt_remote_server ${VERBOSE_FLAG} -j"${PARALLEL_LEVEL}" + + # Install the server executable + if [ -z "${INSTALL_TARGET}" ]; then + echo "Skipping install of cuopt_remote_server (-n flag set)" + else + install -m 755 "${LIBCUOPT_BUILD_DIR}/cuopt_remote_server" "${INSTALL_PREFIX}/bin/" + echo "Installed cuopt_remote_server to ${INSTALL_PREFIX}/bin/" + fi +fi + ################################################################################ # Build deb package if hasArg deb; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fac00c66a..15be59b94 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -225,6 +225,27 @@ create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt) find_package(CUDSS REQUIRED) +# Protocol Buffers for remote solve serialization +find_package(Protobuf REQUIRED) +include_directories(${Protobuf_INCLUDE_DIRS}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Generate C++ code from .proto file +set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities/cuopt_remote.proto") +set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc") +set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h") + +add_custom_command( + OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}" + COMMAND ${Protobuf_PROTOC_EXECUTABLE} + ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} + --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities + ${PROTO_FILE} + DEPENDS ${PROTO_FILE} + COMMENT "Generating C++ code from cuopt_remote.proto" + VERBATIM +) + if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() @@ -236,6 +257,7 @@ if (HOST_LINEINFO) endif() add_library(cuopt SHARED ${CUOPT_SRC_FILES} + ${PROTO_SRCS} ) set_target_properties(cuopt @@ -341,10 +363,11 @@ target_link_libraries(cuopt rapids_logger::rapids_logger CCCL::CCCL raft::raft - cuopt::mps_parser ${CUDSS_LIB_FILE} + protobuf::libprotobuf PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} + cuopt::mps_parser_static # Static link - symbols embedded in libcuopt.so ) @@ -525,6 +548,83 @@ if(BUILD_LP_BENCHMARKS) endif() endif() +# ################################################################################################## +# - cuOpt Remote Server --------------------------------------------------------------------------- +option(BUILD_REMOTE_SERVER "Build cuOpt remote solve server" OFF) +if(BUILD_REMOTE_SERVER AND NOT BUILD_LP_ONLY) + add_executable(cuopt_remote_server cuopt_remote_server.cpp) + target_compile_options(cuopt_remote_server + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + ) + target_include_directories(cuopt_remote_server + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" + "${CMAKE_CURRENT_BINARY_DIR}" + PUBLIC + "$" + "$" + ) + target_link_libraries(cuopt_remote_server + PUBLIC + cuopt + OpenMP::OpenMP_CXX + protobuf::libprotobuf + PRIVATE + ) + # Use RUNPATH so LD_LIBRARY_PATH can override conda paths during development + set_target_properties(cuopt_remote_server PROPERTIES + SKIP_BUILD_RPATH OFF + BUILD_WITH_INSTALL_RPATH OFF + INSTALL_RPATH "$ORIGIN/../${lib_dir}" + ) + # Enable RUNPATH (new dtags) so LD_LIBRARY_PATH takes precedence + target_link_options(cuopt_remote_server PRIVATE -Wl,--enable-new-dtags) + + # Install the server executable + install(TARGETS cuopt_remote_server + COMPONENT runtime + RUNTIME DESTINATION ${_BIN_DEST} + ) + + # Standalone remote solve test client + add_executable(remote_solve_test tests/linear_programming/remote_solve_test.cpp) + target_compile_options(remote_solve_test + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + ) + target_include_directories(remote_solve_test + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" + "${CMAKE_CURRENT_BINARY_DIR}/include" + ) + target_link_libraries(remote_solve_test + PUBLIC + cuopt + ) + # Use RUNPATH so LD_LIBRARY_PATH can override during development + target_link_options(remote_solve_test PRIVATE -Wl,--enable-new-dtags) + + # Local solve test (for debugging) + add_executable(local_solve_test tests/linear_programming/local_solve_test.cpp) + target_compile_options(local_solve_test + PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" + ) + target_include_directories(local_solve_test + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" + "${CMAKE_CURRENT_BINARY_DIR}/include" + ) + target_link_libraries(local_solve_test + PUBLIC + cuopt + ) + target_link_options(local_solve_test PRIVATE -Wl,--enable-new-dtags) +endif() # ################################################################################################## # - CPack has to be the last item in the cmake file------------------------------------------------- diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index b6af636b7..4ff8c0fd8 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -285,6 +286,7 @@ int run_single_file(const std::string& file_path, CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what()); return -1; } + // Note: Solution output is now handled by solve_lp/solve_lp_remote via CUOPT_LOG_INFO } } catch (const std::exception& e) { CUOPT_LOG_ERROR("Error: %s", e.what()); diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp new file mode 100644 index 000000000..86e30a42b --- /dev/null +++ b/cpp/cuopt_remote_server.cpp @@ -0,0 +1,640 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file cuopt_remote_server.cpp + * @brief Remote solve server with sync and async support using pluggable serialization + * + * Features: + * - Sync mode: Submit job, wait for result, return immediately + * - Async mode: Submit job, get job_id, poll for status, retrieve result + * - Uses pluggable serialization (default: Protocol Buffers) + * - Threaded request handling + * - Real-time log streaming to client + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace cuopt::linear_programming; + +// Message types for streaming protocol +enum class MessageType : uint8_t { + LOG_MESSAGE = 0, // Log output from server + SOLUTION = 1, // Final solution data +}; + +// Helper to send a framed message with type +static bool send_typed_message(int sockfd, MessageType type, const void* data, size_t size) +{ + // Message format: [type:1][size:4][payload:size] + uint8_t msg_type = static_cast(type); + uint32_t payload_size = static_cast(size); + + // Write type + if (::write(sockfd, &msg_type, 1) != 1) return false; + // Write size + if (::write(sockfd, &payload_size, 4) != 4) return false; + // Write payload + if (size > 0) { + const uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t written = ::write(sockfd, ptr, remaining); + if (written <= 0) return false; + ptr += written; + remaining -= written; + } + } + return true; +} + +/** + * @brief RAII class to redirect stdout to a pipe and stream output to client + * + * This captures all stdout output from the solver and sends it to the client + * in real-time while also echoing to the original stdout (server console). + */ +class stdout_streamer_t { + public: + stdout_streamer_t(int client_fd, bool enabled) + : client_fd_(client_fd), enabled_(enabled), running_(false), original_stdout_(-1) + { + if (!enabled_) return; + + // Flush any buffered stdout to prevent old content from being captured + fflush(stdout); + + // Create pipe + if (pipe(pipe_fds_) < 0) { + std::cerr << "[Server] Failed to create pipe for stdout streaming\n"; + enabled_ = false; + return; + } + + // Save original stdout + original_stdout_ = dup(STDOUT_FILENO); + if (original_stdout_ < 0) { + close(pipe_fds_[0]); + close(pipe_fds_[1]); + enabled_ = false; + return; + } + + // Redirect stdout to pipe + if (dup2(pipe_fds_[1], STDOUT_FILENO) < 0) { + close(original_stdout_); + close(pipe_fds_[0]); + close(pipe_fds_[1]); + enabled_ = false; + return; + } + + // Close write end of pipe (stdout now writes to it) + close(pipe_fds_[1]); + + // Start reader thread + running_ = true; + reader_thread_ = std::thread(&stdout_streamer_t::reader_loop, this); + } + + ~stdout_streamer_t() + { + if (!enabled_) return; + + // Flush stdout to ensure all output is in the pipe + fflush(stdout); + + // Restore original stdout + dup2(original_stdout_, STDOUT_FILENO); + close(original_stdout_); + + // Signal reader to stop and close pipe read end + running_ = false; + close(pipe_fds_[0]); + + // Wait for reader thread + if (reader_thread_.joinable()) { reader_thread_.join(); } + } + + private: + void reader_loop() + { + char buffer[4096]; + while (running_) { + ssize_t n = read(pipe_fds_[0], buffer, sizeof(buffer) - 1); + if (n <= 0) break; + + buffer[n] = '\0'; + + // Echo to original stdout (server console) + if (original_stdout_ >= 0) { write(original_stdout_, buffer, n); } + + // Send to client + send_typed_message(client_fd_, MessageType::LOG_MESSAGE, buffer, n); + } + } + + int client_fd_; + bool enabled_; + std::atomic running_; + int original_stdout_; + int pipe_fds_[2]; + std::thread reader_thread_; +}; + +// Job status +enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; + +// Job info for tracking +struct JobInfo { + std::string job_id; + JobStatus status; + std::chrono::steady_clock::time_point submit_time; + std::vector request_data; // Stored request + std::vector result_data; // Stored result + bool is_mip; // true for MIP, false for LP + std::string error_message; +}; + +// Global state +std::atomic keep_running{true}; +std::map job_tracker; +std::mutex tracker_mutex; +std::condition_variable job_cv; + +// Server configuration +struct ServerConfig { + int port = 9090; + int num_workers = 1; + bool verbose = true; + bool stream_logs = true; // Enable real-time log streaming to clients +}; + +ServerConfig config; + +void signal_handler(int signal) +{ + if (signal == SIGINT || signal == SIGTERM) { + std::cout << "\n[Server] Received shutdown signal\n"; + keep_running = false; + job_cv.notify_all(); + } +} + +// Generate unique job ID +std::string generate_job_id() +{ + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_int_distribution dis; + + uint64_t id = dis(gen); + char buf[32]; + snprintf(buf, sizeof(buf), "job_%016lx", id); + return std::string(buf); +} + +// Socket helpers +static bool write_all(int sockfd, const void* data, size_t size) +{ + const uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t written = ::write(sockfd, ptr, remaining); + if (written <= 0) return false; + ptr += written; + remaining -= written; + } + return true; +} + +static bool read_all(int sockfd, void* data, size_t size) +{ + uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t nread = ::read(sockfd, ptr, remaining); + if (nread <= 0) return false; + ptr += nread; + remaining -= nread; + } + return true; +} + +static bool send_response(int sockfd, const std::vector& data) +{ + // Legacy response format (for non-streaming clients) + uint32_t size = static_cast(data.size()); + if (!write_all(sockfd, &size, sizeof(size))) return false; + if (!write_all(sockfd, data.data(), data.size())) return false; + return true; +} + +// Send solution using streaming protocol +static bool send_solution_message(int sockfd, const std::vector& data) +{ + return send_typed_message(sockfd, MessageType::SOLUTION, data.data(), data.size()); +} + +static bool receive_request(int sockfd, std::vector& data) +{ + uint32_t size; + if (!read_all(sockfd, &size, sizeof(size))) return false; + + // Sanity check + if (size > 100 * 1024 * 1024) { // Max 100MB + std::cerr << "[Server] Request too large: " << size << " bytes\n"; + return false; + } + + data.resize(size); + if (!read_all(sockfd, data.data(), size)) return false; + return true; +} + +// Worker thread - processes jobs from the queue +void worker_thread(int worker_id) +{ + std::cout << "[Worker " << worker_id << "] Started\n"; + + // Create RAFT handle for GPU operations + raft::handle_t handle; + + // Get serializer + auto serializer = get_serializer(); + + while (keep_running) { + std::string job_id; + std::vector request_data; + bool is_mip = false; + + // Find a queued job + { + std::unique_lock lock(tracker_mutex); + job_cv.wait(lock, []() { + if (!keep_running) return true; + for (const auto& [id, info] : job_tracker) { + if (info.status == JobStatus::QUEUED) return true; + } + return false; + }); + + if (!keep_running) break; + + // Find and claim a job + for (auto& [id, info] : job_tracker) { + if (info.status == JobStatus::QUEUED) { + info.status = JobStatus::PROCESSING; + job_id = id; + request_data = info.request_data; + is_mip = info.is_mip; + break; + } + } + } + + if (job_id.empty()) continue; + + if (config.verbose) { + std::cout << "[Worker " << worker_id << "] Processing job: " << job_id + << " (type: " << (is_mip ? "MIP" : "LP") << ")\n"; + } + + std::vector result_data; + std::string error_message; + bool success = false; + + try { + if (is_mip) { + // Deserialize MIP request + cuopt::mps_parser::mps_data_model_t mps_data; + mip_solver_settings_t settings; + + if (serializer->deserialize_mip_request(request_data, mps_data, settings)) { + // Solve using the data model directly + auto solution = solve_mip(&handle, mps_data, settings); + + // Serialize result + result_data = serializer->serialize_mip_solution(solution); + success = true; + } else { + error_message = "Failed to deserialize MIP request"; + } + } else { + // Deserialize LP request + cuopt::mps_parser::mps_data_model_t mps_data; + pdlp_solver_settings_t settings; + + if (serializer->deserialize_lp_request(request_data, mps_data, settings)) { + // Debug: print deserialized data + std::cout << "[Server DEBUG] Deserialized LP problem:\n"; + std::cout << " Maximize: " << mps_data.get_sense() << "\n"; + std::cout << " Objective coeffs: ["; + for (size_t i = 0; i < mps_data.get_objective_coefficients().size(); ++i) { + std::cout << mps_data.get_objective_coefficients()[i]; + if (i + 1 < mps_data.get_objective_coefficients().size()) std::cout << ", "; + } + std::cout << "]\n"; + std::cout << " Constraint lower bounds: ["; + for (size_t i = 0; i < mps_data.get_constraint_lower_bounds().size(); ++i) { + std::cout << mps_data.get_constraint_lower_bounds()[i]; + if (i + 1 < mps_data.get_constraint_lower_bounds().size()) std::cout << ", "; + } + std::cout << "]\n"; + std::cout << " Constraint upper bounds: ["; + for (size_t i = 0; i < mps_data.get_constraint_upper_bounds().size(); ++i) { + std::cout << mps_data.get_constraint_upper_bounds()[i]; + if (i + 1 < mps_data.get_constraint_upper_bounds().size()) std::cout << ", "; + } + std::cout << "]\n"; + std::cout.flush(); + + // Solve using the data model directly + auto solution = solve_lp(&handle, mps_data, settings); + + // Serialize result + result_data = serializer->serialize_lp_solution(solution); + success = true; + } else { + error_message = "Failed to deserialize LP request"; + } + } + } catch (const std::exception& e) { + error_message = std::string("Exception: ") + e.what(); + } + + // Update job status + { + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + if (it != job_tracker.end()) { + if (success) { + it->second.status = JobStatus::COMPLETED; + it->second.result_data = std::move(result_data); + } else { + it->second.status = JobStatus::FAILED; + it->second.error_message = error_message; + } + } + } + job_cv.notify_all(); + + if (config.verbose) { + std::cout << "[Worker " << worker_id << "] Completed job: " << job_id + << " (success: " << success << ")\n"; + } + } + + std::cout << "[Worker " << worker_id << "] Stopped\n"; +} + +// Handle client connection with streaming log support +void handle_client(int client_fd, bool stream_logs) +{ + auto serializer = get_serializer(); + + // Receive request + std::vector request_data; + if (!receive_request(client_fd, request_data)) { + std::cerr << "[Server] Failed to receive request\n"; + close(client_fd); + return; + } + + if (config.verbose) { + std::cout << "[Server] Received request, size: " << request_data.size() << " bytes\n"; + } + + // Determine request type + bool is_mip = serializer->is_mip_request(request_data); + + // For sync mode with streaming, process directly in this thread + // to enable log streaming to the client + std::string job_id = generate_job_id(); + + if (config.verbose) { + std::cout << "[Server] Processing job: " << job_id << " (type: " << (is_mip ? "MIP" : "LP") + << ", streaming: " << (stream_logs ? "yes" : "no") << ")\n"; + } + + // Create RAFT handle for GPU operations + raft::handle_t handle; + + std::vector result_data; + std::string error_message; + bool success = false; + + // Use RAII stdout streamer - captures stdout and streams to client while + // also echoing to server console. Destructor restores original stdout. + { + stdout_streamer_t streamer(client_fd, stream_logs); + + try { + if (is_mip) { + cuopt::mps_parser::mps_data_model_t mps_data; + mip_solver_settings_t settings; + + if (serializer->deserialize_mip_request(request_data, mps_data, settings)) { + auto solution = solve_mip(&handle, mps_data, settings); + result_data = serializer->serialize_mip_solution(solution); + success = true; + } else { + error_message = "Failed to deserialize MIP request"; + } + } else { + cuopt::mps_parser::mps_data_model_t mps_data; + pdlp_solver_settings_t settings; + + if (serializer->deserialize_lp_request(request_data, mps_data, settings)) { + auto solution = solve_lp(&handle, mps_data, settings); + result_data = serializer->serialize_lp_solution(solution); + success = true; + } else { + error_message = "Failed to deserialize LP request"; + } + } + } catch (const std::exception& e) { + error_message = std::string("Exception: ") + e.what(); + } + } // streamer destructor restores stdout + + if (config.verbose) { + std::cout << "[Server] Completed job: " << job_id << " (success: " << success << ")\n"; + } + + // Send response + if (stream_logs) { + // Use streaming protocol + if (!send_solution_message(client_fd, result_data)) { + std::cerr << "[Server] Failed to send solution message\n"; + } + } else { + // Use legacy protocol + if (!send_response(client_fd, result_data)) { + std::cerr << "[Server] Failed to send response\n"; + } + } + + close(client_fd); +} + +// Legacy handle_client without streaming (backward compatible) +void handle_client(int client_fd) { handle_client(client_fd, false); } + +void print_usage(const char* prog) +{ + std::cout << "Usage: " << prog << " [options]\n" + << "Options:\n" + << " -p PORT Port to listen on (default: 9090)\n" + << " -w NUM Number of worker threads (default: 1)\n" + << " -q Quiet mode (less verbose output)\n" + << " --no-stream Disable real-time log streaming to clients\n" + << " -h Show this help\n"; +} + +int main(int argc, char** argv) +{ + // Parse arguments + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-p") == 0 && i + 1 < argc) { + config.port = std::stoi(argv[++i]); + } else if (strcmp(argv[i], "-w") == 0 && i + 1 < argc) { + config.num_workers = std::stoi(argv[++i]); + } else if (strcmp(argv[i], "-q") == 0) { + config.verbose = false; + } else if (strcmp(argv[i], "--no-stream") == 0) { + config.stream_logs = false; + } else if (strcmp(argv[i], "-h") == 0) { + print_usage(argv[0]); + return 0; + } + } + + // Set up signal handlers + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + // IMPORTANT: Clear remote solve environment variables to prevent infinite recursion + // The server should always do local solves, never try to connect to itself + unsetenv("CUOPT_REMOTE_HOST"); + unsetenv("CUOPT_REMOTE_PORT"); + + std::cout << "=== cuOpt Remote Solve Server ===\n"; + std::cout << "Port: " << config.port << "\n"; + std::cout << "Workers: " << config.num_workers << "\n"; + std::cout << "Log streaming: " << (config.stream_logs ? "enabled" : "disabled") << "\n"; + + // Start worker threads + std::vector workers; + for (int i = 0; i < config.num_workers; ++i) { + workers.emplace_back(worker_thread, i); + } + + // Create server socket + int server_fd = socket(AF_INET, SOCK_STREAM, 0); + if (server_fd < 0) { + std::cerr << "[Server] Failed to create socket\n"; + return 1; + } + + // Allow address reuse + int opt = 1; + setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + // Bind + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + addr.sin_port = htons(config.port); + + if (bind(server_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { + std::cerr << "[Server] Failed to bind to port " << config.port << "\n"; + close(server_fd); + return 1; + } + + // Listen + if (listen(server_fd, 10) < 0) { + std::cerr << "[Server] Failed to listen\n"; + close(server_fd); + return 1; + } + + std::cout << "[Server] Listening on port " << config.port << "\n"; + + // Accept connections + while (keep_running) { + struct sockaddr_in client_addr; + socklen_t client_len = sizeof(client_addr); + + // Use select for timeout so we can check keep_running + fd_set read_fds; + FD_ZERO(&read_fds); + FD_SET(server_fd, &read_fds); + + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + + int ready = select(server_fd + 1, &read_fds, nullptr, nullptr, &tv); + if (ready < 0) { + if (errno == EINTR) continue; + std::cerr << "[Server] Select error\n"; + break; + } + if (ready == 0) continue; // Timeout + + int client_fd = accept(server_fd, (struct sockaddr*)&client_addr, &client_len); + if (client_fd < 0) { + if (errno == EINTR) continue; + std::cerr << "[Server] Accept error\n"; + continue; + } + + if (config.verbose) { + char client_ip[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &client_addr.sin_addr, client_ip, INET_ADDRSTRLEN); + std::cout << "[Server] Connection from " << client_ip << "\n"; + } + + // Handle in separate thread with streaming based on config + std::thread([client_fd]() { handle_client(client_fd, config.stream_logs); }).detach(); + } + + // Shutdown + std::cout << "[Server] Shutting down...\n"; + close(server_fd); + + // Wait for workers + job_cv.notify_all(); + for (auto& w : workers) { + if (w.joinable()) w.join(); + } + + std::cout << "[Server] Stopped\n"; + return 0; +} diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp index 385b1e0b9..1315f5ec2 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp @@ -104,6 +104,81 @@ class mip_solution_t : public base_solution_t { void write_to_sol_file(std::string_view filename, rmm::cuda_stream_view stream_view) const; void log_summary() const; + //============================================================================ + // Setters for remote solve deserialization + //============================================================================ + + /** + * @brief Set the solution in host memory + * @param solution The solution vector + */ + void set_solution_host(std::vector solution); + + /** + * @brief Set the objective value + */ + void set_objective(f_t value); + + /** + * @brief Set the MIP gap + */ + void set_mip_gap(f_t value); + + /** + * @brief Set the solution bound + */ + void set_solution_bound(f_t value); + + /** + * @brief Set total solve time + */ + void set_total_solve_time(double value); + + /** + * @brief Set presolve time + */ + void set_presolve_time(double value); + + /** + * @brief Set max constraint violation + */ + void set_max_constraint_violation(f_t value); + + /** + * @brief Set max integer violation + */ + void set_max_int_violation(f_t value); + + /** + * @brief Set max variable bound violation + */ + void set_max_variable_bound_violation(f_t value); + + /** + * @brief Set number of nodes + */ + void set_nodes(i_t value); + + /** + * @brief Set number of simplex iterations + */ + void set_simplex_iterations(i_t value); + + /** + * @brief Get error string + */ + std::string get_error_string() const; + + /** + * @brief Get number of nodes + */ + i_t get_nodes() const; + + /** + * @brief Get number of simplex iterations + */ + i_t get_simplex_iterations() const; + private: // GPU (device) storage - populated for local GPU solves std::unique_ptr> solution_; diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index 1dbac6933..d975ea11e 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -326,6 +326,128 @@ class optimization_problem_solution_t : public base_solution_t { pdlp_warm_start_data_t& get_pdlp_warm_start_data(); + //============================================================================ + // Setters for host solution data (used by remote solve deserialization) + //============================================================================ + + /** + * @brief Set the primal solution in host memory + * @param solution The primal solution vector + */ + void set_primal_solution_host(std::vector solution); + + /** + * @brief Set the dual solution in host memory + * @param solution The dual solution vector + */ + void set_dual_solution_host(std::vector solution); + + /** + * @brief Set the reduced cost in host memory + * @param reduced_cost The reduced cost vector + */ + void set_reduced_cost_host(std::vector reduced_cost); + + /** + * @brief Set the termination statistics + * @param stats The termination statistics + */ + void set_termination_stats(const additional_termination_information_t& stats); + + //============================================================================ + // Getters for termination statistics + //============================================================================ + + /** + * @brief Get the L2 primal residual + * @return L2 primal residual + */ + f_t get_l2_primal_residual() const; + + /** + * @brief Get the L2 dual residual + * @return L2 dual residual + */ + f_t get_l2_dual_residual() const; + + /** + * @brief Get the primal objective value + * @return Primal objective + */ + f_t get_primal_objective() const; + + /** + * @brief Get the dual objective value + * @return Dual objective + */ + f_t get_dual_objective() const; + + /** + * @brief Get the duality gap + * @return Gap + */ + f_t get_gap() const; + + /** + * @brief Get number of iterations + * @return Number of iterations + */ + i_t get_nb_iterations() const; + + /** + * @brief Check if solved by PDLP + * @return true if solved by PDLP + */ + bool get_solved_by_pdlp() const; + + /** + * @brief Set L2 primal residual + * @param value The value + */ + void set_l2_primal_residual(f_t value); + + /** + * @brief Set L2 dual residual + * @param value The value + */ + void set_l2_dual_residual(f_t value); + + /** + * @brief Set primal objective + * @param value The value + */ + void set_primal_objective(f_t value); + + /** + * @brief Set dual objective + * @param value The value + */ + void set_dual_objective(f_t value); + + /** + * @brief Set gap + * @param value The value + */ + void set_gap(f_t value); + + /** + * @brief Set number of iterations + * @param value The value + */ + void set_nb_iterations(i_t value); + + /** + * @brief Set solved by PDLP flag + * @param value The value + */ + void set_solved_by_pdlp(bool value); + + /** + * @brief Get error string + * @return Error message string + */ + std::string get_error_string() const; + /** * @brief Writes the solver_solution object as a JSON object to the 'filename' file using * 'stream_view' to transfer the data from device to host before it is written to the file. @@ -369,6 +491,9 @@ class optimization_problem_solution_t : public base_solution_t { // Flag indicating where solution data is stored bool is_device_memory_ = true; + // Flag indicating if solved by PDLP (vs dual simplex) + bool solved_by_pdlp_ = true; + pdlp_warm_start_data_t pdlp_warm_start_data_; pdlp_termination_status_t termination_status_; diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp new file mode 100644 index 000000000..b74fdb35c --- /dev/null +++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp @@ -0,0 +1,288 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming { + +/** + * @brief Abstract interface for serializing/deserializing cuOpt problems and solutions. + * + * This interface allows users to provide custom serialization implementations + * for different wire formats (protobuf, JSON, msgpack, custom binary, etc.). + * + * The default implementation uses Protocol Buffers and is built into libcuopt. + * Users can provide their own implementation by: + * 1. Implementing this interface + * 2. Compiling to a shared library + * 3. Setting CUOPT_SERIALIZER_LIB environment variable to the library path + * + * @tparam i_t Index type (int32_t or int64_t) + * @tparam f_t Float type (float or double) + */ +template +class remote_serializer_t { + public: + virtual ~remote_serializer_t() = default; + + //============================================================================ + // Problem Serialization + //============================================================================ + + /** + * @brief Serialize an LP problem with settings to a byte buffer. + * + * @param view The problem data view (can point to CPU or GPU memory) + * @param settings Solver settings + * @return Serialized byte buffer ready for network transmission + */ + virtual std::vector serialize_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings) = 0; + + /** + * @brief Serialize a MIP problem with settings to a byte buffer. + * + * @param view The problem data view (can point to CPU or GPU memory) + * @param settings Solver settings + * @return Serialized byte buffer ready for network transmission + */ + virtual std::vector serialize_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings) = 0; + + //============================================================================ + // Solution Deserialization + //============================================================================ + + /** + * @brief Deserialize an LP solution from a byte buffer. + * + * @param data The serialized solution bytes received from the server + * @return The deserialized LP solution object + */ + virtual optimization_problem_solution_t deserialize_lp_solution( + const std::vector& data) = 0; + + /** + * @brief Deserialize a MIP solution from a byte buffer. + * + * @param data The serialized solution bytes received from the server + * @return The deserialized MIP solution object + */ + virtual mip_solution_t deserialize_mip_solution(const std::vector& data) = 0; + + //============================================================================ + // Server-side: Request Deserialization & Response Serialization + //============================================================================ + + /** + * @brief Check if serialized data is an LP or MIP request. + * + * @param data The serialized request bytes + * @return true if MIP request, false if LP request + */ + virtual bool is_mip_request(const std::vector& data) = 0; + + /** + * @brief Deserialize an LP request (problem + settings) from bytes. + * + * This is used by the server to receive problems from clients. + * + * @param data The serialized request bytes + * @param[out] view_data CPU storage that will be populated with problem data + * @param[out] settings Settings will be populated here + * @return true on success, false on parse error + */ + virtual bool deserialize_lp_request(const std::vector& data, + mps_parser::mps_data_model_t& view_data, + pdlp_solver_settings_t& settings) = 0; + + /** + * @brief Deserialize a MIP request (problem + settings) from bytes. + * + * @param data The serialized request bytes + * @param[out] view_data CPU storage that will be populated with problem data + * @param[out] settings Settings will be populated here + * @return true on success, false on parse error + */ + virtual bool deserialize_mip_request(const std::vector& data, + mps_parser::mps_data_model_t& view_data, + mip_solver_settings_t& settings) = 0; + + /** + * @brief Serialize an LP solution to bytes for sending to client. + * + * @param solution The LP solution to serialize + * @return Serialized byte buffer + */ + virtual std::vector serialize_lp_solution( + const optimization_problem_solution_t& solution) = 0; + + /** + * @brief Serialize a MIP solution to bytes for sending to client. + * + * @param solution The MIP solution to serialize + * @return Serialized byte buffer + */ + virtual std::vector serialize_mip_solution(const mip_solution_t& solution) = 0; + + //============================================================================ + // Async Operations + //============================================================================ + + /** + * @brief Serialize a job submission request with async options. + * + * @param view Problem data + * @param settings LP solver settings + * @param blocking If true, server should wait and return result synchronously + * @return Serialized async request bytes + */ + virtual std::vector serialize_async_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings, + bool blocking) = 0; + + /** + * @brief Serialize a job submission request with async options. + * + * @param view Problem data + * @param settings MIP solver settings + * @param blocking If true, server should wait and return result synchronously + * @return Serialized async request bytes + */ + virtual std::vector serialize_async_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings, + bool blocking) = 0; + + /** + * @brief Serialize a status check request. + * + * @param job_id The job ID to check + * @return Serialized request bytes + */ + virtual std::vector serialize_status_request(const std::string& job_id) = 0; + + /** + * @brief Serialize a get result request. + * + * @param job_id The job ID to get results for + * @return Serialized request bytes + */ + virtual std::vector serialize_get_result_request(const std::string& job_id) = 0; + + /** + * @brief Serialize a delete request. + * + * @param job_id The job ID to delete + * @return Serialized request bytes + */ + virtual std::vector serialize_delete_request(const std::string& job_id) = 0; + + /** + * @brief Job status enumeration. + */ + enum class job_status_t { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; + + /** + * @brief Deserialize job submission response. + * + * @param data Response bytes + * @param[out] job_id Job ID assigned by server (on success) + * @param[out] error_message Error message (on failure) + * @return true if submission succeeded + */ + virtual bool deserialize_submit_response(const std::vector& data, + std::string& job_id, + std::string& error_message) = 0; + + /** + * @brief Deserialize status check response. + * + * @param data Response bytes + * @return Job status + */ + virtual job_status_t deserialize_status_response(const std::vector& data) = 0; + + /** + * @brief Deserialize result response as LP solution. + * + * @param data Response bytes + * @return LP solution, or error solution if failed + */ + virtual optimization_problem_solution_t deserialize_lp_result_response( + const std::vector& data) = 0; + + /** + * @brief Deserialize result response as MIP solution. + * + * @param data Response bytes + * @return MIP solution, or error solution if failed + */ + virtual mip_solution_t deserialize_mip_result_response( + const std::vector& data) = 0; + + //============================================================================ + // Metadata + //============================================================================ + + /** + * @brief Get the serialization format name (for logging/debugging). + * + * @return Format name string (e.g., "protobuf", "json", "msgpack") + */ + virtual std::string format_name() const = 0; + + /** + * @brief Get version of the serialization protocol. + * + * @return Protocol version number + */ + virtual uint32_t protocol_version() const = 0; +}; + +/** + * @brief Factory function type for creating serializer instances. + * + * Custom serializer libraries must export a function with this signature + * named "create_cuopt_serializer". + */ +template +using serializer_factory_t = std::unique_ptr> (*)(); + +/** + * @brief Get the default (protobuf) serializer instance. + * + * @return Shared pointer to the default serializer + */ +template +std::shared_ptr> get_default_serializer(); + +/** + * @brief Get the currently configured serializer. + * + * Returns the custom serializer if CUOPT_SERIALIZER_LIB is set, + * otherwise returns the default protobuf serializer. + * + * @return Shared pointer to the serializer + */ +template +std::shared_ptr> get_serializer(); + +} // namespace cuopt::linear_programming diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp index dab725823..3d1a1d1ec 100644 --- a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp @@ -1,12 +1,18 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#include +#include +#include +#include +#include + #include #include #include @@ -54,4 +60,36 @@ inline std::optional get_remote_solve_config() */ inline bool is_remote_solve_enabled() { return get_remote_solve_config().has_value(); } +/** + * @brief Solve an LP problem on a remote server. + * + * @tparam i_t Index type (int32_t) + * @tparam f_t Float type (float or double) + * @param config Remote server configuration + * @param view Problem data view + * @param settings Solver settings + * @return Solution from the remote server + */ +template +optimization_problem_solution_t solve_lp_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings); + +/** + * @brief Solve a MIP problem on a remote server. + * + * @tparam i_t Index type (int32_t) + * @tparam f_t Float type (float or double) + * @param config Remote server configuration + * @param view Problem data view + * @param settings Solver settings + * @return Solution from the remote server + */ +template +mip_solution_t solve_mip_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings); + } // namespace cuopt::linear_programming diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt index 9c96cc911..dc2d271ea 100644 --- a/cpp/libmps_parser/CMakeLists.txt +++ b/cpp/libmps_parser/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -64,7 +64,8 @@ if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() -add_library(mps_parser SHARED +# Source files for mps_parser +set(MPS_PARSER_SOURCES src/data_model_view.cpp src/mps_data_model.cpp src/mps_parser.cpp @@ -74,6 +75,12 @@ add_library(mps_parser SHARED src/utilities/cython_mps_parser.cpp ) +# Shared library for standalone use +add_library(mps_parser SHARED ${MPS_PARSER_SOURCES}) + +# Static library for linking into libcuopt +add_library(mps_parser_static STATIC ${MPS_PARSER_SOURCES}) + set_target_properties(mps_parser PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" @@ -84,10 +91,21 @@ set_target_properties(mps_parser INTERFACE_POSITION_INDEPENDENT_CODE ON ) +set_target_properties(mps_parser_static + PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON +) + target_compile_options(mps_parser PRIVATE "$<$:${MPS_PARSER_CXX_FLAGS}>" ) +target_compile_options(mps_parser_static + PRIVATE "$<$:${MPS_PARSER_CXX_FLAGS}>" +) + if(WRITE_FATBIN) file(WRITE "${MPS_PARSER_BINARY_DIR}/fatbin.ld" [=[ @@ -101,6 +119,7 @@ if(WRITE_FATBIN) endif() add_library(cuopt::mps_parser ALIAS mps_parser) +add_library(cuopt::mps_parser_static ALIAS mps_parser_static) # ################################################################################################## # - include paths --------------------------------------------------------------------------------- @@ -113,6 +132,15 @@ target_include_directories(mps_parser "$" ) +target_include_directories(mps_parser_static + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + PUBLIC + "$" + "$" +) + if(MPS_PARSER_WITH_BZIP2) target_include_directories(mps_parser PRIVATE BZip2::BZip2) endif(MPS_PARSER_WITH_BZIP2) diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp index 7db2b390c..8be1b899a 100644 --- a/cpp/libmps_parser/src/data_model_view.cpp +++ b/cpp/libmps_parser/src/data_model_view.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,6 +9,8 @@ #include #include +#include + namespace cuopt::mps_parser { template @@ -348,7 +350,8 @@ bool data_model_view_t::has_quadratic_objective() const noexcept // NOTE: Explicitly instantiate all types here in order to avoid linker error template class data_model_view_t; - template class data_model_view_t; +template class data_model_view_t; +template class data_model_view_t; } // namespace cuopt::mps_parser diff --git a/cpp/libmps_parser/src/mps_data_model.cpp b/cpp/libmps_parser/src/mps_data_model.cpp index 7d0d44a03..605d5cef6 100644 --- a/cpp/libmps_parser/src/mps_data_model.cpp +++ b/cpp/libmps_parser/src/mps_data_model.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -9,6 +9,7 @@ #include #include +#include namespace cuopt::mps_parser { @@ -462,8 +463,9 @@ bool mps_data_model_t::has_quadratic_objective() const noexcept // NOTE: Explicitly instantiate all types here in order to avoid linker error template class mps_data_model_t; - template class mps_data_model_t; +template class mps_data_model_t; +template class mps_data_model_t; // TODO current raft to cusparse wrappers only support int64_t // can be CUSPARSE_INDEX_16U, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_64I diff --git a/cpp/scripts/run_dev_server.sh b/cpp/scripts/run_dev_server.sh new file mode 100755 index 000000000..cc6f51d90 --- /dev/null +++ b/cpp/scripts/run_dev_server.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Development script to run the cuopt remote server with the correct library path +# This is only needed during development when the build directory's libcuopt.so +# needs to take precedence over the conda-installed version. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_DIR="${SCRIPT_DIR}/../build" + +if [ ! -f "${BUILD_DIR}/cuopt_remote_server" ]; then + echo "Error: cuopt_remote_server not found. Please build with -DBUILD_REMOTE_SERVER=ON" + exit 1 +fi + +export LD_LIBRARY_PATH="${BUILD_DIR}:${LD_LIBRARY_PATH}" + +echo "Starting cuopt remote server..." +echo "Build directory: ${BUILD_DIR}" +echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" +echo "---" + +exec "${BUILD_DIR}/cuopt_remote_server" "$@" diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt index c3f673e03..c57f61930 100644 --- a/cpp/src/linear_programming/CMakeLists.txt +++ b/cpp/src/linear_programming/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -8,6 +8,8 @@ set(LP_CORE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/protobuf_serializer.cu + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/remote_solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu ${CMAKE_CURRENT_SOURCE_DIR}/pdhg.cu diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index 73407ee1f..0f57a8660 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1420,11 +1421,8 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), remote_config->port); - // TODO: Implement remote solve - serialize cpu_view and send to remote server - CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); - // Use CPU-only error constructor - return optimization_problem_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); + // Call the remote solve function with CPU-side view + return solve_lp_remote(*remote_config, cpu_view, settings); } // Local solve: data already on GPU - convert view to optimization_problem_t and solve @@ -1437,11 +1435,8 @@ optimization_problem_solution_t solve_lp(raft::handle_t const* handle_ CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), remote_config->port); - // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server - CUOPT_LOG_ERROR("[solve_lp] Remote solve not yet implemented"); - // Use CPU-only error constructor - return optimization_problem_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); + // Call the remote solve function + return solve_lp_remote(*remote_config, view, settings); } // Local solve with CPU data: copy to GPU and solve diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu index 027003fb1..79dfdb774 100644 --- a/cpp/src/linear_programming/solver_solution.cu +++ b/cpp/src/linear_programming/solver_solution.cu @@ -501,6 +501,136 @@ optimization_problem_solution_t::get_pdlp_warm_start_data() return pdlp_warm_start_data_; } +//============================================================================ +// Setters for host solution data +//============================================================================ + +template +void optimization_problem_solution_t::set_primal_solution_host(std::vector solution) +{ + primal_solution_host_ = std::make_unique>(std::move(solution)); + is_device_memory_ = false; +} + +template +void optimization_problem_solution_t::set_dual_solution_host(std::vector solution) +{ + dual_solution_host_ = std::make_unique>(std::move(solution)); + is_device_memory_ = false; +} + +template +void optimization_problem_solution_t::set_reduced_cost_host(std::vector reduced_cost) +{ + reduced_cost_host_ = std::make_unique>(std::move(reduced_cost)); + is_device_memory_ = false; +} + +template +void optimization_problem_solution_t::set_termination_stats( + const additional_termination_information_t& stats) +{ + termination_stats_ = stats; +} + +//============================================================================ +// Getters for termination statistics +//============================================================================ + +template +f_t optimization_problem_solution_t::get_l2_primal_residual() const +{ + return termination_stats_.l2_primal_residual; +} + +template +f_t optimization_problem_solution_t::get_l2_dual_residual() const +{ + return termination_stats_.l2_dual_residual; +} + +template +f_t optimization_problem_solution_t::get_primal_objective() const +{ + return termination_stats_.primal_objective; +} + +template +f_t optimization_problem_solution_t::get_dual_objective() const +{ + return termination_stats_.dual_objective; +} + +template +f_t optimization_problem_solution_t::get_gap() const +{ + return termination_stats_.gap; +} + +template +i_t optimization_problem_solution_t::get_nb_iterations() const +{ + return termination_stats_.number_of_steps_taken; +} + +template +bool optimization_problem_solution_t::get_solved_by_pdlp() const +{ + return solved_by_pdlp_; +} + +//============================================================================ +// Setters for termination statistics +//============================================================================ + +template +void optimization_problem_solution_t::set_l2_primal_residual(f_t value) +{ + termination_stats_.l2_primal_residual = value; +} + +template +void optimization_problem_solution_t::set_l2_dual_residual(f_t value) +{ + termination_stats_.l2_dual_residual = value; +} + +template +void optimization_problem_solution_t::set_primal_objective(f_t value) +{ + termination_stats_.primal_objective = value; +} + +template +void optimization_problem_solution_t::set_dual_objective(f_t value) +{ + termination_stats_.dual_objective = value; +} + +template +void optimization_problem_solution_t::set_gap(f_t value) +{ + termination_stats_.gap = value; +} + +template +void optimization_problem_solution_t::set_nb_iterations(i_t value) +{ + termination_stats_.number_of_steps_taken = value; +} + +template +void optimization_problem_solution_t::set_solved_by_pdlp(bool value) +{ + solved_by_pdlp_ = value; +} + +template +std::string optimization_problem_solution_t::get_error_string() const +{ + return error_status_.what(); +} + template void optimization_problem_solution_t::write_to_sol_file( std::string_view filename, rmm::cuda_stream_view stream_view) const diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto new file mode 100644 index 000000000..faf93e094 --- /dev/null +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -0,0 +1,289 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +syntax = "proto3"; + +package cuopt.remote; + +// Protocol version and metadata +message RequestHeader { + uint32 version = 1; // Protocol version (currently 1) + ProblemType problem_type = 2; // LP or MIP + IndexType index_type = 3; // INT32 or INT64 + FloatType float_type = 4; // FLOAT32 or DOUBLE +} + +enum ProblemType { + LP = 0; + MIP = 1; +} + +enum IndexType { + INT32 = 0; + INT64 = 1; +} + +enum FloatType { + FLOAT32 = 0; + DOUBLE = 1; +} + +// Optimization problem representation (matches optimization_problem_t) +message OptimizationProblem { + // Problem metadata + string problem_name = 1; + string objective_name = 2; + bool maximize = 3; + double objective_scaling_factor = 4; + double objective_offset = 5; + ProblemCategory problem_category = 6; + + // Variable names (optional, can be large) + repeated string variable_names = 7; + repeated string row_names = 8; + + // Constraint matrix in CSR format + repeated double constraint_matrix_values = 10; + repeated int32 constraint_matrix_indices = 11; + repeated int32 constraint_matrix_offsets = 12; + + // Problem vectors + repeated double objective_coefficients = 20; + repeated double constraint_bounds = 21; + repeated double variable_lower_bounds = 22; + repeated double variable_upper_bounds = 23; + + // Constraint bounds (two representations supported by cuOpt) + repeated double constraint_lower_bounds = 24; + repeated double constraint_upper_bounds = 25; + bytes row_types = 26; // char array of constraint types ('<', '>', '=') + + // Integer/binary variable indicators (for MIP) + repeated bool is_integer = 30; + repeated bool is_binary = 31; +} + +enum ProblemCategory { + LP_PROBLEM = 0; + MIP_PROBLEM = 1; + IP_PROBLEM = 2; +} + +// PDLP solver settings +message PDLPSolverSettings { + // Termination criteria + double eps_optimal_absolute = 1; + double eps_optimal_relative = 2; + double eps_primal_infeasible = 3; + double eps_dual_infeasible = 4; + int32 time_sec_limit = 5; + int32 iteration_limit = 6; + + // Algorithm parameters + double initial_primal_weight = 10; + double initial_step_size = 11; + int32 verbosity = 12; + + // Warm start data (if provided) + PDLPWarmStartData warm_start_data = 20; +} + +message PDLPWarmStartData { + repeated double current_primal_solution = 1; + repeated double current_dual_solution = 2; + repeated double initial_primal_average = 3; + repeated double initial_dual_average = 4; + repeated double current_ATY = 5; + repeated double sum_primal_solutions = 6; + repeated double sum_dual_solutions = 7; + repeated double last_restart_duality_gap_primal_solution = 8; + repeated double last_restart_duality_gap_dual_solution = 9; + + double initial_primal_weight = 10; + double initial_step_size = 11; + int32 total_pdlp_iterations = 12; + int32 total_pdhg_iterations = 13; + double last_candidate_kkt_score = 14; + double last_restart_kkt_score = 15; + double sum_solution_weight = 16; + int32 iterations_since_last_restart = 17; +} + +// MIP solver settings +message MIPSolverSettings { + double time_limit = 1; + double mip_gap = 2; + int32 verbosity = 3; + // Additional MIP settings can be added here +} + +// LP solve request +message SolveLPRequest { + RequestHeader header = 1; + OptimizationProblem problem = 2; + PDLPSolverSettings settings = 3; +} + +// MIP solve request +message SolveMIPRequest { + RequestHeader header = 1; + OptimizationProblem problem = 2; + MIPSolverSettings settings = 3; +} + +// LP solution +message LPSolution { + // Solution vectors + repeated double primal_solution = 1; + repeated double dual_solution = 2; + repeated double reduced_cost = 3; + + // Warm start data for next solve + PDLPWarmStartData warm_start_data = 4; + + // Termination information + PDLPTerminationStatus termination_status = 10; + string error_message = 11; + + // Solution statistics + double l2_primal_residual = 20; + double l2_dual_residual = 21; + double primal_objective = 22; + double dual_objective = 23; + double gap = 24; + int32 nb_iterations = 25; + double solve_time = 26; + bool solved_by_pdlp = 27; +} + +enum PDLPTerminationStatus { + PDLP_NO_TERMINATION = 0; + PDLP_NUMERICAL_ERROR = 1; + PDLP_OPTIMAL = 2; + PDLP_PRIMAL_INFEASIBLE = 3; + PDLP_DUAL_INFEASIBLE = 4; + PDLP_ITERATION_LIMIT = 5; + PDLP_TIME_LIMIT = 6; + PDLP_CONCURRENT_LIMIT = 7; + PDLP_PRIMAL_FEASIBLE = 8; +} + +// MIP solution +message MIPSolution { + repeated double solution = 1; + + MIPTerminationStatus termination_status = 10; + string error_message = 11; + + double objective = 20; + double mip_gap = 21; + double solution_bound = 22; + double total_solve_time = 23; + double presolve_time = 24; + double max_constraint_violation = 25; + double max_int_violation = 26; + double max_variable_bound_violation = 27; + int32 nodes = 28; + int32 simplex_iterations = 29; +} + +enum MIPTerminationStatus { + MIP_NO_TERMINATION = 0; + MIP_OPTIMAL = 1; + MIP_FEASIBLE_FOUND = 2; + MIP_INFEASIBLE = 3; + MIP_UNBOUNDED = 4; + MIP_TIME_LIMIT = 5; +} + +// Request types for async operations +enum RequestType { + SUBMIT_JOB = 0; // Submit a new solve job + CHECK_STATUS = 1; // Check job status + GET_RESULT = 2; // Retrieve completed result + DELETE_RESULT = 3; // Delete result from server +} + +// Job status for async operations +enum JobStatus { + QUEUED = 0; // Job submitted, waiting in queue + PROCESSING = 1; // Job currently being solved + COMPLETED = 2; // Job completed successfully + FAILED = 3; // Job failed with error + NOT_FOUND = 4; // Job ID not found +} + +// Generic request wrapper for async operations +message AsyncRequest { + RequestType request_type = 1; + string job_id = 2; // For status/get/delete operations + bool blocking = 3; // If true, server waits and returns solution (sync mode) + + // For SUBMIT_JOB requests + oneof job_data { + SolveLPRequest lp_request = 10; + SolveMIPRequest mip_request = 11; + } +} + +// Response for job submission +message SubmitResponse { + ResponseStatus status = 1; + string job_id = 2; // Unique job identifier + string message = 3; // Success/error message +} + +// Response for status check +message StatusResponse { + JobStatus job_status = 1; + string message = 2; + double progress = 3; // 0.0-1.0 (future enhancement) +} + +// Response for get result +message ResultResponse { + ResponseStatus status = 1; + string error_message = 2; + + oneof solution { + LPSolution lp_solution = 10; + MIPSolution mip_solution = 11; + } +} + +// Response for delete +message DeleteResponse { + ResponseStatus status = 1; + string message = 2; +} + +// Generic response wrapper +message AsyncResponse { + RequestType request_type = 1; + + oneof response_data { + SubmitResponse submit_response = 10; + StatusResponse status_response = 11; + ResultResponse result_response = 12; + DeleteResponse delete_response = 13; + } +} + +// Legacy synchronous response (for backward compatibility) +message SolveResponse { + ResponseStatus status = 1; + string error_message = 2; + + oneof solution { + LPSolution lp_solution = 10; + MIPSolution mip_solution = 11; + } +} + +enum ResponseStatus { + SUCCESS = 0; + ERROR_INVALID_REQUEST = 1; + ERROR_SOLVE_FAILED = 2; + ERROR_INTERNAL = 3; + ERROR_NOT_FOUND = 4; +} diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu new file mode 100644 index 000000000..3192c894b --- /dev/null +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -0,0 +1,924 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include + +#include +#include + +namespace cuopt::linear_programming { + +namespace { + +// Convert cuOpt termination status to protobuf enum +cuopt::remote::PDLPTerminationStatus to_proto_status(pdlp_termination_status_t status) +{ + switch (status) { + case pdlp_termination_status_t::NoTermination: return cuopt::remote::PDLP_NO_TERMINATION; + case pdlp_termination_status_t::NumericalError: return cuopt::remote::PDLP_NUMERICAL_ERROR; + case pdlp_termination_status_t::Optimal: return cuopt::remote::PDLP_OPTIMAL; + case pdlp_termination_status_t::PrimalInfeasible: return cuopt::remote::PDLP_PRIMAL_INFEASIBLE; + case pdlp_termination_status_t::DualInfeasible: return cuopt::remote::PDLP_DUAL_INFEASIBLE; + case pdlp_termination_status_t::IterationLimit: return cuopt::remote::PDLP_ITERATION_LIMIT; + case pdlp_termination_status_t::TimeLimit: return cuopt::remote::PDLP_TIME_LIMIT; + case pdlp_termination_status_t::ConcurrentLimit: return cuopt::remote::PDLP_CONCURRENT_LIMIT; + case pdlp_termination_status_t::PrimalFeasible: return cuopt::remote::PDLP_PRIMAL_FEASIBLE; + default: return cuopt::remote::PDLP_NO_TERMINATION; + } +} + +// Convert protobuf enum to cuOpt termination status +pdlp_termination_status_t from_proto_status(cuopt::remote::PDLPTerminationStatus status) +{ + switch (status) { + case cuopt::remote::PDLP_NO_TERMINATION: return pdlp_termination_status_t::NoTermination; + case cuopt::remote::PDLP_NUMERICAL_ERROR: return pdlp_termination_status_t::NumericalError; + case cuopt::remote::PDLP_OPTIMAL: return pdlp_termination_status_t::Optimal; + case cuopt::remote::PDLP_PRIMAL_INFEASIBLE: return pdlp_termination_status_t::PrimalInfeasible; + case cuopt::remote::PDLP_DUAL_INFEASIBLE: return pdlp_termination_status_t::DualInfeasible; + case cuopt::remote::PDLP_ITERATION_LIMIT: return pdlp_termination_status_t::IterationLimit; + case cuopt::remote::PDLP_TIME_LIMIT: return pdlp_termination_status_t::TimeLimit; + case cuopt::remote::PDLP_CONCURRENT_LIMIT: return pdlp_termination_status_t::ConcurrentLimit; + case cuopt::remote::PDLP_PRIMAL_FEASIBLE: return pdlp_termination_status_t::PrimalFeasible; + default: return pdlp_termination_status_t::NoTermination; + } +} + +// Convert MIP termination status +cuopt::remote::MIPTerminationStatus to_proto_mip_status(mip_termination_status_t status) +{ + switch (status) { + case mip_termination_status_t::NoTermination: return cuopt::remote::MIP_NO_TERMINATION; + case mip_termination_status_t::Optimal: return cuopt::remote::MIP_OPTIMAL; + case mip_termination_status_t::FeasibleFound: return cuopt::remote::MIP_FEASIBLE_FOUND; + case mip_termination_status_t::Infeasible: return cuopt::remote::MIP_INFEASIBLE; + case mip_termination_status_t::Unbounded: return cuopt::remote::MIP_UNBOUNDED; + case mip_termination_status_t::TimeLimit: return cuopt::remote::MIP_TIME_LIMIT; + default: return cuopt::remote::MIP_NO_TERMINATION; + } +} + +mip_termination_status_t from_proto_mip_status(cuopt::remote::MIPTerminationStatus status) +{ + switch (status) { + case cuopt::remote::MIP_NO_TERMINATION: return mip_termination_status_t::NoTermination; + case cuopt::remote::MIP_OPTIMAL: return mip_termination_status_t::Optimal; + case cuopt::remote::MIP_FEASIBLE_FOUND: return mip_termination_status_t::FeasibleFound; + case cuopt::remote::MIP_INFEASIBLE: return mip_termination_status_t::Infeasible; + case cuopt::remote::MIP_UNBOUNDED: return mip_termination_status_t::Unbounded; + case cuopt::remote::MIP_TIME_LIMIT: return mip_termination_status_t::TimeLimit; + default: return mip_termination_status_t::NoTermination; + } +} + +} // namespace + +/** + * @brief Default Protocol Buffers serializer implementation. + */ +template +class protobuf_serializer_t : public remote_serializer_t { + public: + using job_status_t = typename remote_serializer_t::job_status_t; + + protobuf_serializer_t() = default; + ~protobuf_serializer_t() override = default; + + //============================================================================ + // Problem Serialization + //============================================================================ + + std::vector serialize_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings) override + { + cuopt::remote::SolveLPRequest request; + + // Set header + auto* header = request.mutable_header(); + header->set_version(protocol_version()); + header->set_problem_type(cuopt::remote::LP); + header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64); + header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE); + + // Serialize problem data + serialize_problem_to_proto(view, request.mutable_problem()); + + // Serialize settings + serialize_lp_settings_to_proto(settings, request.mutable_settings()); + + // Serialize to bytes + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings) override + { + cuopt::remote::SolveMIPRequest request; + + // Set header + auto* header = request.mutable_header(); + header->set_version(protocol_version()); + header->set_problem_type(cuopt::remote::MIP); + header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64); + header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE); + + // Serialize problem data + serialize_problem_to_proto(view, request.mutable_problem()); + + // Serialize settings + auto* pb_settings = request.mutable_settings(); + pb_settings->set_time_limit(settings.time_limit); + pb_settings->set_mip_gap(settings.tolerances.relative_mip_gap); + // Note: verbosity not directly available in mip_solver_settings_t + pb_settings->set_verbosity(0); + + // Serialize to bytes + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + //============================================================================ + // Solution Deserialization + //============================================================================ + + optimization_problem_solution_t deserialize_lp_solution( + const std::vector& data) override + { + cuopt::remote::LPSolution pb_solution; + if (!pb_solution.ParseFromArray(data.data(), data.size())) { + return optimization_problem_solution_t( + cuopt::logic_error("Failed to parse LP solution", cuopt::error_type_t::RuntimeError)); + } + + return proto_to_lp_solution(pb_solution); + } + + mip_solution_t deserialize_mip_solution(const std::vector& data) override + { + cuopt::remote::MIPSolution pb_solution; + if (!pb_solution.ParseFromArray(data.data(), data.size())) { + return mip_solution_t( + cuopt::logic_error("Failed to parse MIP solution", cuopt::error_type_t::RuntimeError)); + } + + return proto_to_mip_solution(pb_solution); + } + + //============================================================================ + // Server-side Operations + //============================================================================ + + bool is_mip_request(const std::vector& data) override + { + // Try to parse as async request first + cuopt::remote::AsyncRequest async_request; + if (async_request.ParseFromArray(data.data(), data.size())) { + return async_request.has_mip_request(); + } + + // Try LP request + cuopt::remote::SolveLPRequest lp_request; + if (lp_request.ParseFromArray(data.data(), data.size())) { return false; } + + // Try MIP request + cuopt::remote::SolveMIPRequest mip_request; + if (mip_request.ParseFromArray(data.data(), data.size())) { return true; } + + return false; // Default to LP if can't determine + } + + bool deserialize_lp_request(const std::vector& data, + cuopt::mps_parser::mps_data_model_t& mps_data, + pdlp_solver_settings_t& settings) override + { + // Try async request first + cuopt::remote::AsyncRequest async_request; + if (async_request.ParseFromArray(data.data(), data.size()) && async_request.has_lp_request()) { + const auto& lp_request = async_request.lp_request(); + proto_to_mps_data(lp_request.problem(), mps_data); + proto_to_lp_settings(lp_request.settings(), settings); + return true; + } + + // Try direct LP request + cuopt::remote::SolveLPRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { + CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse LP request"); + return false; + } + + proto_to_mps_data(request.problem(), mps_data); + proto_to_lp_settings(request.settings(), settings); + return true; + } + + bool deserialize_mip_request(const std::vector& data, + cuopt::mps_parser::mps_data_model_t& mps_data, + mip_solver_settings_t& settings) override + { + // Try async request first + cuopt::remote::AsyncRequest async_request; + if (async_request.ParseFromArray(data.data(), data.size()) && async_request.has_mip_request()) { + const auto& mip_request = async_request.mip_request(); + proto_to_mps_data(mip_request.problem(), mps_data); + proto_to_mip_settings(mip_request.settings(), settings); + return true; + } + + // Try direct MIP request + cuopt::remote::SolveMIPRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { + CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse MIP request"); + return false; + } + + proto_to_mps_data(request.problem(), mps_data); + proto_to_mip_settings(request.settings(), settings); + return true; + } + + std::vector serialize_lp_solution( + const optimization_problem_solution_t& solution) override + { + cuopt::remote::LPSolution pb_solution; + lp_solution_to_proto(solution, &pb_solution); + + std::vector result(pb_solution.ByteSizeLong()); + pb_solution.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_mip_solution(const mip_solution_t& solution) override + { + cuopt::remote::MIPSolution pb_solution; + mip_solution_to_proto(solution, &pb_solution); + + std::vector result(pb_solution.ByteSizeLong()); + pb_solution.SerializeToArray(result.data(), result.size()); + return result; + } + + //============================================================================ + // Async Operations + //============================================================================ + + std::vector serialize_async_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings, + bool blocking) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::SUBMIT_JOB); + request.set_blocking(blocking); + + auto* lp_request = request.mutable_lp_request(); + + // Set header + auto* header = lp_request->mutable_header(); + header->set_version(protocol_version()); + header->set_problem_type(cuopt::remote::LP); + header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64); + header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE); + + serialize_problem_to_proto(view, lp_request->mutable_problem()); + serialize_lp_settings_to_proto(settings, lp_request->mutable_settings()); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_async_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings, + bool blocking) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::SUBMIT_JOB); + request.set_blocking(blocking); + + auto* mip_request = request.mutable_mip_request(); + + // Set header + auto* header = mip_request->mutable_header(); + header->set_version(protocol_version()); + header->set_problem_type(cuopt::remote::MIP); + header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64); + header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE); + + serialize_problem_to_proto(view, mip_request->mutable_problem()); + + auto* pb_settings = mip_request->mutable_settings(); + pb_settings->set_time_limit(settings.time_limit); + pb_settings->set_mip_gap(settings.tolerances.relative_mip_gap); + pb_settings->set_verbosity(0); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_status_request(const std::string& job_id) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::CHECK_STATUS); + request.set_job_id(job_id); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_get_result_request(const std::string& job_id) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::GET_RESULT); + request.set_job_id(job_id); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + std::vector serialize_delete_request(const std::string& job_id) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::DELETE_RESULT); + request.set_job_id(job_id); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + + bool deserialize_submit_response(const std::vector& data, + std::string& job_id, + std::string& error_message) override + { + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size())) { + error_message = "Failed to parse submit response"; + return false; + } + + if (!response.has_submit_response()) { + error_message = "Response is not a submit response"; + return false; + } + + const auto& submit = response.submit_response(); + if (submit.status() != cuopt::remote::SUCCESS) { + error_message = submit.message(); + return false; + } + + job_id = submit.job_id(); + return true; + } + + job_status_t deserialize_status_response(const std::vector& data) override + { + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size()) || !response.has_status_response()) { + return job_status_t::NOT_FOUND; + } + + switch (response.status_response().job_status()) { + case cuopt::remote::QUEUED: return job_status_t::QUEUED; + case cuopt::remote::PROCESSING: return job_status_t::PROCESSING; + case cuopt::remote::COMPLETED: return job_status_t::COMPLETED; + case cuopt::remote::FAILED: return job_status_t::FAILED; + case cuopt::remote::NOT_FOUND: + default: return job_status_t::NOT_FOUND; + } + } + + optimization_problem_solution_t deserialize_lp_result_response( + const std::vector& data) override + { + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size())) { + return optimization_problem_solution_t( + cuopt::logic_error("Failed to parse result response", cuopt::error_type_t::RuntimeError)); + } + + if (!response.has_result_response()) { + return optimization_problem_solution_t( + cuopt::logic_error("Response is not a result response", cuopt::error_type_t::RuntimeError)); + } + + const auto& result = response.result_response(); + if (result.status() != cuopt::remote::SUCCESS) { + return optimization_problem_solution_t( + cuopt::logic_error(result.error_message(), cuopt::error_type_t::RuntimeError)); + } + + if (!result.has_lp_solution()) { + return optimization_problem_solution_t(cuopt::logic_error( + "Response does not contain LP solution", cuopt::error_type_t::RuntimeError)); + } + + return proto_to_lp_solution(result.lp_solution()); + } + + mip_solution_t deserialize_mip_result_response( + const std::vector& data) override + { + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size())) { + return mip_solution_t( + cuopt::logic_error("Failed to parse result response", cuopt::error_type_t::RuntimeError)); + } + + if (!response.has_result_response()) { + return mip_solution_t( + cuopt::logic_error("Response is not a result response", cuopt::error_type_t::RuntimeError)); + } + + const auto& result = response.result_response(); + if (result.status() != cuopt::remote::SUCCESS) { + return mip_solution_t( + cuopt::logic_error(result.error_message(), cuopt::error_type_t::RuntimeError)); + } + + if (!result.has_mip_solution()) { + return mip_solution_t(cuopt::logic_error("Response does not contain MIP solution", + cuopt::error_type_t::RuntimeError)); + } + + return proto_to_mip_solution(result.mip_solution()); + } + + //============================================================================ + // Metadata + //============================================================================ + + std::string format_name() const override { return "protobuf"; } + + uint32_t protocol_version() const override { return 1; } + + private: + //============================================================================ + // Helper Methods - Problem Serialization + //============================================================================ + + void serialize_problem_to_proto(const mps_parser::data_model_view_t& view, + cuopt::remote::OptimizationProblem* pb_problem) + { + // Note: view must point to CPU memory for serialization + // The solve functions ensure this by copying GPU data to CPU if needed + + pb_problem->set_problem_name(view.get_problem_name()); + pb_problem->set_objective_name(view.get_objective_name()); + pb_problem->set_maximize(view.get_sense()); // get_sense() returns true for maximize + pb_problem->set_objective_scaling_factor(view.get_objective_scaling_factor()); + pb_problem->set_objective_offset(view.get_objective_offset()); + + // Get spans for constraint matrix (CSR format) + auto values_span = view.get_constraint_matrix_values(); + auto indices_span = view.get_constraint_matrix_indices(); + auto offsets_span = view.get_constraint_matrix_offsets(); + + auto nnz = static_cast(values_span.size()); + auto n_rows = static_cast(offsets_span.size()) - 1; + + const f_t* values_ptr = values_span.data(); + const i_t* indices_ptr = indices_span.data(); + const i_t* offsets_ptr = offsets_span.data(); + + for (i_t i = 0; i < nnz; ++i) { + pb_problem->add_constraint_matrix_values(static_cast(values_ptr[i])); + } + for (i_t i = 0; i < nnz; ++i) { + pb_problem->add_constraint_matrix_indices(static_cast(indices_ptr[i])); + } + for (i_t i = 0; i <= n_rows; ++i) { + pb_problem->add_constraint_matrix_offsets(static_cast(offsets_ptr[i])); + } + + // Objective coefficients + auto obj_span = view.get_objective_coefficients(); + auto n_cols = static_cast(obj_span.size()); + const f_t* obj_ptr = obj_span.data(); + for (i_t i = 0; i < n_cols; ++i) { + pb_problem->add_objective_coefficients(static_cast(obj_ptr[i])); + } + + // Variable bounds + auto lb_span = view.get_variable_lower_bounds(); + auto ub_span = view.get_variable_upper_bounds(); + const f_t* lb_ptr = lb_span.data(); + const f_t* ub_ptr = ub_span.data(); + for (i_t i = 0; i < n_cols; ++i) { + pb_problem->add_variable_lower_bounds(static_cast(lb_ptr[i])); + pb_problem->add_variable_upper_bounds(static_cast(ub_ptr[i])); + } + + // Constraint bounds + auto c_lb_span = view.get_constraint_lower_bounds(); + auto c_ub_span = view.get_constraint_upper_bounds(); + const f_t* c_lb_ptr = c_lb_span.data(); + const f_t* c_ub_ptr = c_ub_span.data(); + for (i_t i = 0; i < n_rows; ++i) { + pb_problem->add_constraint_lower_bounds(static_cast(c_lb_ptr[i])); + pb_problem->add_constraint_upper_bounds(static_cast(c_ub_ptr[i])); + } + + // Variable names (if available) + const auto& var_names = view.get_variable_names(); + for (const auto& name : var_names) { + pb_problem->add_variable_names(name); + } + + // Row names (if available) + const auto& row_names = view.get_row_names(); + for (const auto& name : row_names) { + pb_problem->add_row_names(name); + } + } + + void serialize_lp_settings_to_proto(const pdlp_solver_settings_t& settings, + cuopt::remote::PDLPSolverSettings* pb_settings) + { + // Map from cuOpt tolerances to protobuf settings + pb_settings->set_eps_optimal_absolute(settings.tolerances.absolute_gap_tolerance); + pb_settings->set_eps_optimal_relative(settings.tolerances.relative_gap_tolerance); + pb_settings->set_eps_primal_infeasible(settings.tolerances.primal_infeasible_tolerance); + pb_settings->set_eps_dual_infeasible(settings.tolerances.dual_infeasible_tolerance); + + // Handle infinity time_limit: use -1 to signal "no limit" in the protobuf + // This avoids undefined behavior when casting infinity to int32_t + int32_t time_limit_sec = -1; // -1 means no limit + if (std::isfinite(settings.time_limit) && settings.time_limit > 0) { + time_limit_sec = static_cast( + std::min(settings.time_limit, static_cast(std::numeric_limits::max()))); + } + pb_settings->set_time_sec_limit(time_limit_sec); + + // Handle max iteration limit similarly + int32_t iter_limit = -1; // -1 means no limit + if (settings.iteration_limit > 0 && + settings.iteration_limit < std::numeric_limits::max()) { + iter_limit = + static_cast(std::min(static_cast(settings.iteration_limit), + static_cast(std::numeric_limits::max()))); + } + pb_settings->set_iteration_limit(iter_limit); + + // initial_primal_weight and initial_step_size are not directly accessible, use defaults + pb_settings->set_initial_primal_weight(1.0); + pb_settings->set_initial_step_size(1.0); + pb_settings->set_verbosity(settings.log_to_console ? 1 : 0); + } + + //============================================================================ + // Helper Methods - Problem Deserialization + //============================================================================ + + void proto_to_mps_data(const cuopt::remote::OptimizationProblem& pb_problem, + cuopt::mps_parser::mps_data_model_t& mps_data) + { + mps_data.set_problem_name(pb_problem.problem_name()); + mps_data.set_objective_name(pb_problem.objective_name()); + mps_data.set_maximize(pb_problem.maximize()); + mps_data.set_objective_scaling_factor(pb_problem.objective_scaling_factor()); + mps_data.set_objective_offset(pb_problem.objective_offset()); + + // Copy constraint matrix - mps_data_model_t copies the data + std::vector values(pb_problem.constraint_matrix_values().begin(), + pb_problem.constraint_matrix_values().end()); + std::vector indices(pb_problem.constraint_matrix_indices().begin(), + pb_problem.constraint_matrix_indices().end()); + std::vector offsets(pb_problem.constraint_matrix_offsets().begin(), + pb_problem.constraint_matrix_offsets().end()); + + mps_data.set_csr_constraint_matrix(values.data(), + static_cast(values.size()), + indices.data(), + static_cast(indices.size()), + offsets.data(), + static_cast(offsets.size())); + + // Objective coefficients + std::vector obj(pb_problem.objective_coefficients().begin(), + pb_problem.objective_coefficients().end()); + mps_data.set_objective_coefficients(obj.data(), static_cast(obj.size())); + + // Variable bounds + std::vector var_lb(pb_problem.variable_lower_bounds().begin(), + pb_problem.variable_lower_bounds().end()); + std::vector var_ub(pb_problem.variable_upper_bounds().begin(), + pb_problem.variable_upper_bounds().end()); + mps_data.set_variable_lower_bounds(var_lb.data(), static_cast(var_lb.size())); + mps_data.set_variable_upper_bounds(var_ub.data(), static_cast(var_ub.size())); + + // Constraint bounds + std::vector con_lb(pb_problem.constraint_lower_bounds().begin(), + pb_problem.constraint_lower_bounds().end()); + std::vector con_ub(pb_problem.constraint_upper_bounds().begin(), + pb_problem.constraint_upper_bounds().end()); + mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); + mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); + + // Variable names + if (pb_problem.variable_names_size() > 0) { + std::vector var_names(pb_problem.variable_names().begin(), + pb_problem.variable_names().end()); + mps_data.set_variable_names(var_names); + } + + // Row names + if (pb_problem.row_names_size() > 0) { + std::vector row_names(pb_problem.row_names().begin(), + pb_problem.row_names().end()); + mps_data.set_row_names(row_names); + } + } + + void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings, + pdlp_solver_settings_t& settings) + { + // Map from protobuf settings to cuOpt tolerances + settings.tolerances.absolute_gap_tolerance = pb_settings.eps_optimal_absolute(); + settings.tolerances.relative_gap_tolerance = pb_settings.eps_optimal_relative(); + settings.tolerances.primal_infeasible_tolerance = pb_settings.eps_primal_infeasible(); + settings.tolerances.dual_infeasible_tolerance = pb_settings.eps_dual_infeasible(); + + // Handle time limit: -1 or 0 means no limit (infinity) + int32_t time_limit_sec = pb_settings.time_sec_limit(); + if (time_limit_sec <= 0) { + settings.time_limit = std::numeric_limits::infinity(); + } else { + settings.time_limit = static_cast(time_limit_sec); + } + + // Handle iteration limit: -1 or 0 means no limit + int32_t iter_limit = pb_settings.iteration_limit(); + if (iter_limit <= 0) { + settings.iteration_limit = std::numeric_limits::max(); + } else { + settings.iteration_limit = static_cast(iter_limit); + } + + settings.log_to_console = pb_settings.verbosity() > 0; + } + + void proto_to_mip_settings(const cuopt::remote::MIPSolverSettings& pb_settings, + mip_solver_settings_t& settings) + { + settings.time_limit = pb_settings.time_limit(); + settings.tolerances.relative_mip_gap = pb_settings.mip_gap(); + // Note: verbosity not directly supported in mip_solver_settings_t + } + + //============================================================================ + // Helper Methods - Solution Conversion + //============================================================================ + + optimization_problem_solution_t proto_to_lp_solution( + const cuopt::remote::LPSolution& pb_solution) + { + // Create CPU-based solution + std::vector primal(pb_solution.primal_solution().begin(), + pb_solution.primal_solution().end()); + std::vector dual(pb_solution.dual_solution().begin(), pb_solution.dual_solution().end()); + std::vector reduced_cost(pb_solution.reduced_cost().begin(), + pb_solution.reduced_cost().end()); + + optimization_problem_solution_t solution( + from_proto_status(pb_solution.termination_status())); + + // Set solution data + solution.set_primal_solution_host(std::move(primal)); + solution.set_dual_solution_host(std::move(dual)); + solution.set_reduced_cost_host(std::move(reduced_cost)); + + // Set statistics + solution.set_l2_primal_residual(pb_solution.l2_primal_residual()); + solution.set_l2_dual_residual(pb_solution.l2_dual_residual()); + solution.set_primal_objective(pb_solution.primal_objective()); + solution.set_dual_objective(pb_solution.dual_objective()); + solution.set_gap(pb_solution.gap()); + solution.set_nb_iterations(pb_solution.nb_iterations()); + solution.set_solve_time(pb_solution.solve_time()); + solution.set_solved_by_pdlp(pb_solution.solved_by_pdlp()); + + return solution; + } + + void lp_solution_to_proto(const optimization_problem_solution_t& solution, + cuopt::remote::LPSolution* pb_solution) + { + pb_solution->set_termination_status(to_proto_status(solution.get_termination_status())); + pb_solution->set_error_message(solution.get_error_string()); + + // Solution vectors - handle both device and host memory + if (solution.is_device_memory()) { + // Copy from device to host + const auto& d_primal = solution.get_primal_solution(); + const auto& d_dual = solution.get_dual_solution(); + // Note: reduced_cost getter is non-const, so we need to work around this + + // Copy primal solution from device + if (d_primal.size() > 0) { + std::vector h_primal(d_primal.size()); + cudaMemcpy( + h_primal.data(), d_primal.data(), d_primal.size() * sizeof(f_t), cudaMemcpyDeviceToHost); + for (const auto& v : h_primal) { + pb_solution->add_primal_solution(static_cast(v)); + } + } + + // Copy dual solution from device + if (d_dual.size() > 0) { + std::vector h_dual(d_dual.size()); + cudaMemcpy( + h_dual.data(), d_dual.data(), d_dual.size() * sizeof(f_t), cudaMemcpyDeviceToHost); + for (const auto& v : h_dual) { + pb_solution->add_dual_solution(static_cast(v)); + } + } + + // For reduced cost, we can access via const cast since we're just reading + auto& nc_solution = const_cast&>(solution); + auto& d_reduced_cost = nc_solution.get_reduced_cost(); + if (d_reduced_cost.size() > 0) { + std::vector h_reduced_cost(d_reduced_cost.size()); + cudaMemcpy(h_reduced_cost.data(), + d_reduced_cost.data(), + d_reduced_cost.size() * sizeof(f_t), + cudaMemcpyDeviceToHost); + for (const auto& v : h_reduced_cost) { + pb_solution->add_reduced_cost(static_cast(v)); + } + } + } else { + // Data is already on host + const auto& primal = solution.get_primal_solution_host(); + const auto& dual = solution.get_dual_solution_host(); + const auto& reduced_cost = solution.get_reduced_cost_host(); + + for (const auto& v : primal) { + pb_solution->add_primal_solution(static_cast(v)); + } + for (const auto& v : dual) { + pb_solution->add_dual_solution(static_cast(v)); + } + for (const auto& v : reduced_cost) { + pb_solution->add_reduced_cost(static_cast(v)); + } + } + + // Statistics + pb_solution->set_l2_primal_residual(solution.get_l2_primal_residual()); + pb_solution->set_l2_dual_residual(solution.get_l2_dual_residual()); + pb_solution->set_primal_objective(solution.get_primal_objective()); + pb_solution->set_dual_objective(solution.get_dual_objective()); + pb_solution->set_gap(solution.get_gap()); + pb_solution->set_nb_iterations(solution.get_nb_iterations()); + pb_solution->set_solve_time(solution.get_solve_time()); + pb_solution->set_solved_by_pdlp(solution.get_solved_by_pdlp()); + } + + mip_solution_t proto_to_mip_solution(const cuopt::remote::MIPSolution& pb_solution) + { + std::vector solution_vec(pb_solution.solution().begin(), pb_solution.solution().end()); + + // Create stats from protobuf data + solver_stats_t stats; + stats.total_solve_time = pb_solution.total_solve_time(); + stats.presolve_time = pb_solution.presolve_time(); + stats.solution_bound = pb_solution.solution_bound(); + stats.num_nodes = pb_solution.nodes(); + stats.num_simplex_iterations = pb_solution.simplex_iterations(); + + mip_solution_t solution(from_proto_mip_status(pb_solution.termination_status()), + stats); + + solution.set_solution_host(std::move(solution_vec)); + solution.set_objective(pb_solution.objective()); + solution.set_mip_gap(pb_solution.mip_gap()); + solution.set_max_constraint_violation(pb_solution.max_constraint_violation()); + solution.set_max_int_violation(pb_solution.max_int_violation()); + solution.set_max_variable_bound_violation(pb_solution.max_variable_bound_violation()); + + return solution; + } + + void mip_solution_to_proto(const mip_solution_t& solution, + cuopt::remote::MIPSolution* pb_solution) + { + pb_solution->set_termination_status(to_proto_mip_status(solution.get_termination_status())); + pb_solution->set_error_message(solution.get_error_string()); + + // Handle both device and host memory + if (solution.is_device_memory()) { + const auto& d_sol = solution.get_solution(); + if (d_sol.size() > 0) { + std::vector h_sol(d_sol.size()); + cudaMemcpy(h_sol.data(), d_sol.data(), d_sol.size() * sizeof(f_t), cudaMemcpyDeviceToHost); + for (const auto& v : h_sol) { + pb_solution->add_solution(static_cast(v)); + } + } + } else { + const auto& sol_vec = solution.get_solution_host(); + for (const auto& v : sol_vec) { + pb_solution->add_solution(static_cast(v)); + } + } + + pb_solution->set_objective(solution.get_objective_value()); + pb_solution->set_mip_gap(solution.get_mip_gap()); + pb_solution->set_solution_bound(solution.get_solution_bound()); + pb_solution->set_total_solve_time(solution.get_total_solve_time()); + pb_solution->set_presolve_time(solution.get_presolve_time()); + pb_solution->set_max_constraint_violation(solution.get_max_constraint_violation()); + pb_solution->set_max_int_violation(solution.get_max_int_violation()); + pb_solution->set_max_variable_bound_violation(solution.get_max_variable_bound_violation()); + pb_solution->set_nodes(solution.get_num_nodes()); + pb_solution->set_simplex_iterations(solution.get_num_simplex_iterations()); + } +}; + +//============================================================================ +// Template Instantiations +// Note: Only int32_t and double types are instantiated to avoid adding +// int64_t instantiations throughout the codebase +//============================================================================ + +#if CUOPT_INSTANTIATE_FLOAT +template class protobuf_serializer_t; +#endif + +#if CUOPT_INSTANTIATE_DOUBLE +template class protobuf_serializer_t; +#endif + +//============================================================================ +// Factory Functions +//============================================================================ + +template +std::shared_ptr> get_default_serializer() +{ + static auto instance = std::make_shared>(); + return instance; +} + +// Explicit instantiations for factory functions +#if CUOPT_INSTANTIATE_FLOAT +template std::shared_ptr> get_default_serializer(); +#endif + +#if CUOPT_INSTANTIATE_DOUBLE +template std::shared_ptr> get_default_serializer(); +#endif + +// Custom serializer loader (lazy-initialized) +template +std::shared_ptr> get_serializer() +{ + static std::shared_ptr> instance; + static std::once_flag init_flag; + + std::call_once(init_flag, []() { + const char* custom_lib = std::getenv("CUOPT_SERIALIZER_LIB"); + + if (custom_lib && custom_lib[0] != '\0') { + // Try to load custom serializer + CUOPT_LOG_INFO("[remote_solve] Loading custom serializer from: {}", custom_lib); + + // Dynamic loading would go here + // For now, fall back to default + CUOPT_LOG_WARN("[remote_solve] Custom serializer loading not yet implemented, using default"); + instance = get_default_serializer(); + } else { + instance = get_default_serializer(); + } + }); + + return instance; +} + +// Explicit instantiations +#if CUOPT_INSTANTIATE_FLOAT +template std::shared_ptr> get_serializer(); +#endif + +#if CUOPT_INSTANTIATE_DOUBLE +template std::shared_ptr> get_serializer(); +#endif + +} // namespace cuopt::linear_programming diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu new file mode 100644 index 000000000..d4e8df0b8 --- /dev/null +++ b/cpp/src/linear_programming/utilities/remote_solve.cu @@ -0,0 +1,360 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace cuopt::linear_programming { + +namespace { + +// Message types for streaming protocol (must match server) +enum class MessageType : uint8_t { + LOG_MESSAGE = 0, // Log output from server + SOLUTION = 1, // Final solution data +}; + +/** + * @brief Simple socket client for remote solve with streaming support + */ +class remote_client_t { + public: + remote_client_t(const std::string& host, int port) : host_(host), port_(port), sockfd_(-1) {} + + ~remote_client_t() { disconnect(); } + + bool connect() + { + if (sockfd_ >= 0) return true; + + sockfd_ = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd_ < 0) { + CUOPT_LOG_ERROR("[remote_solve] Failed to create socket"); + return false; + } + + struct hostent* server = gethostbyname(host_.c_str()); + if (server == nullptr) { + CUOPT_LOG_ERROR("[remote_solve] Unknown host: {}", host_); + close(sockfd_); + sockfd_ = -1; + return false; + } + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length); + addr.sin_port = htons(port_); + + if (::connect(sockfd_, (struct sockaddr*)&addr, sizeof(addr)) < 0) { + CUOPT_LOG_ERROR("[remote_solve] Failed to connect to {}:{}", host_, port_); + close(sockfd_); + sockfd_ = -1; + return false; + } + + CUOPT_LOG_INFO("[remote_solve] Connected to {}:{}", host_, port_); + return true; + } + + void disconnect() + { + if (sockfd_ >= 0) { + close(sockfd_); + sockfd_ = -1; + } + } + + bool send_request(const std::vector& data) + { + if (sockfd_ < 0) return false; + + // Send size first (4 bytes, network byte order) + uint32_t size = static_cast(data.size()); + if (!write_all(&size, sizeof(size))) return false; + if (!write_all(data.data(), data.size())) return false; + return true; + } + + /** + * @brief Receive response with streaming log support. + * + * This method reads messages from the server. If the server sends LOG_MESSAGE + * types, they are printed to the console. When a SOLUTION message is received, + * the solution data is returned. + * + * @param data Output buffer for solution data + * @param log_to_console If true, print received log messages + * @return true if solution was received, false on error + */ + bool receive_streaming_response(std::vector& data, bool log_to_console) + { + if (sockfd_ < 0) return false; + + while (true) { + // Read message type (1 byte) + uint8_t msg_type; + if (!read_all(&msg_type, 1)) { + // If we can't read the message type, try legacy format + // (server might not support streaming) + return receive_response_legacy(data); + } + + // Read payload size (4 bytes) + uint32_t payload_size; + if (!read_all(&payload_size, sizeof(payload_size))) return false; + + // Sanity check + if (payload_size > 100 * 1024 * 1024) { + CUOPT_LOG_ERROR("[remote_solve] Message too large: {} bytes", payload_size); + return false; + } + + if (static_cast(msg_type) == MessageType::LOG_MESSAGE) { + // Read and display log message + if (payload_size > 0) { + std::vector log_msg(payload_size + 1); + if (!read_all(log_msg.data(), payload_size)) return false; + log_msg[payload_size] = '\0'; + + if (log_to_console) { + // Print log message from server (already formatted) + std::cout << log_msg.data() << std::flush; + } + } + } else if (static_cast(msg_type) == MessageType::SOLUTION) { + // Read solution data + data.resize(payload_size); + if (payload_size > 0) { + if (!read_all(data.data(), payload_size)) return false; + } + return true; + } else { + CUOPT_LOG_WARN("[remote_solve] Unknown message type: {}", static_cast(msg_type)); + // Skip unknown message + if (payload_size > 0) { + std::vector skip_buf(payload_size); + if (!read_all(skip_buf.data(), payload_size)) return false; + } + } + } + } + + // Legacy response format (non-streaming) + bool receive_response(std::vector& data) { return receive_response_legacy(data); } + + private: + bool receive_response_legacy(std::vector& data) + { + if (sockfd_ < 0) return false; + + // Read size first + uint32_t size; + if (!read_all(&size, sizeof(size))) return false; + + // Sanity check + if (size > 100 * 1024 * 1024) { + CUOPT_LOG_ERROR("[remote_solve] Response too large: {} bytes", size); + return false; + } + + data.resize(size); + if (!read_all(data.data(), size)) return false; + return true; + } + + bool write_all(const void* buf, size_t len) + { + const uint8_t* ptr = static_cast(buf); + size_t remaining = len; + while (remaining > 0) { + ssize_t n = ::write(sockfd_, ptr, remaining); + if (n <= 0) { + CUOPT_LOG_ERROR("[remote_solve] Write failed"); + return false; + } + ptr += n; + remaining -= n; + } + return true; + } + + bool read_all(void* buf, size_t len) + { + uint8_t* ptr = static_cast(buf); + size_t remaining = len; + while (remaining > 0) { + ssize_t n = ::read(sockfd_, ptr, remaining); + if (n <= 0) { + CUOPT_LOG_ERROR("[remote_solve] Read failed"); + return false; + } + ptr += n; + remaining -= n; + } + return true; + } + + std::string host_; + int port_; + int sockfd_; +}; + +} // namespace + +template +optimization_problem_solution_t solve_lp_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings) +{ + CUOPT_LOG_INFO("[remote_solve] Solving LP remotely on {}:{}", config.host, config.port); + + // Log problem info (similar to local solve) + if (settings.log_to_console) { + auto n_rows = view.get_constraint_matrix_offsets().size() > 0 + ? static_cast(view.get_constraint_matrix_offsets().size()) - 1 + : 0; + auto n_cols = static_cast(view.get_objective_coefficients().size()); + auto nnz = static_cast(view.get_constraint_matrix_values().size()); + CUOPT_LOG_INFO("Solving a problem with %d constraints, %d variables, and %d nonzeros (remote)", + n_rows, + n_cols, + nnz); + } + + auto serializer = get_serializer(); + + // Serialize the request + std::vector request_data = serializer->serialize_lp_request(view, settings); + CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request: {} bytes", request_data.size()); + + // Connect and send + remote_client_t client(config.host, config.port); + if (!client.connect()) { + return optimization_problem_solution_t( + cuopt::logic_error("Failed to connect to remote server", cuopt::error_type_t::RuntimeError)); + } + + if (!client.send_request(request_data)) { + return optimization_problem_solution_t(cuopt::logic_error( + "Failed to send request to remote server", cuopt::error_type_t::RuntimeError)); + } + + // Receive response with streaming log support + // Server sends LOG_MESSAGE types during solve, then SOLUTION at end + std::vector response_data; + if (!client.receive_streaming_response(response_data, settings.log_to_console)) { + return optimization_problem_solution_t(cuopt::logic_error( + "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); + } + + CUOPT_LOG_DEBUG("[remote_solve] Received LP solution: {} bytes", response_data.size()); + + // Deserialize solution + auto solution = serializer->deserialize_lp_solution(response_data); + + // Note: Detailed logs were already streamed from server, no need to duplicate summary here + + return solution; +} + +template +mip_solution_t solve_mip_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings) +{ + CUOPT_LOG_INFO("[remote_solve] Solving MIP remotely on {}:{}", config.host, config.port); + + // Log problem info (similar to local solve) + // Note: MIP settings don't have log_to_console, so we always log + { + auto n_rows = view.get_constraint_matrix_offsets().size() > 0 + ? static_cast(view.get_constraint_matrix_offsets().size()) - 1 + : 0; + auto n_cols = static_cast(view.get_objective_coefficients().size()); + auto nnz = static_cast(view.get_constraint_matrix_values().size()); + CUOPT_LOG_INFO( + "Solving a problem with %d constraints, %d variables, and %d nonzeros (remote MIP)", + n_rows, + n_cols, + nnz); + } + + auto serializer = get_serializer(); + + // Serialize the request + std::vector request_data = serializer->serialize_mip_request(view, settings); + CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request: {} bytes", request_data.size()); + + // Connect and send + remote_client_t client(config.host, config.port); + if (!client.connect()) { + return mip_solution_t( + cuopt::logic_error("Failed to connect to remote server", cuopt::error_type_t::RuntimeError)); + } + + if (!client.send_request(request_data)) { + return mip_solution_t(cuopt::logic_error("Failed to send request to remote server", + cuopt::error_type_t::RuntimeError)); + } + + // Receive response with streaming log support + std::vector response_data; + if (!client.receive_streaming_response(response_data, true /* log_to_console */)) { + return mip_solution_t(cuopt::logic_error( + "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); + } + + CUOPT_LOG_DEBUG("[remote_solve] Received MIP solution: {} bytes", response_data.size()); + + // Deserialize solution + auto solution = serializer->deserialize_mip_solution(response_data); + + // Note: Detailed logs were already streamed from server, no need to duplicate summary here + + return solution; +} + +// Explicit instantiations +#if CUOPT_INSTANTIATE_FLOAT +template optimization_problem_solution_t solve_lp_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings); + +template mip_solution_t solve_mip_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings); +#endif + +#if CUOPT_INSTANTIATE_DOUBLE +template optimization_problem_solution_t solve_lp_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings); + +template mip_solution_t solve_mip_remote( + const remote_solve_config_t& config, + const cuopt::mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings); +#endif + +} // namespace cuopt::linear_programming diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu index 0fc8eff02..43f5a0cab 100644 --- a/cpp/src/mip/solver_solution.cu +++ b/cpp/src/mip/solver_solution.cu @@ -318,6 +318,95 @@ void mip_solution_t::log_summary() const CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time()); } +//============================================================================ +// Setters for remote solve deserialization +//============================================================================ + +template +void mip_solution_t::set_solution_host(std::vector solution) +{ + solution_host_ = std::make_unique>(std::move(solution)); + is_device_memory_ = false; +} + +template +void mip_solution_t::set_objective(f_t value) +{ + objective_ = value; +} + +template +void mip_solution_t::set_mip_gap(f_t value) +{ + mip_gap_ = value; +} + +template +void mip_solution_t::set_solution_bound(f_t value) +{ + stats_.solution_bound = value; +} + +template +void mip_solution_t::set_total_solve_time(double value) +{ + stats_.total_solve_time = value; +} + +template +void mip_solution_t::set_presolve_time(double value) +{ + stats_.presolve_time = value; +} + +template +void mip_solution_t::set_max_constraint_violation(f_t value) +{ + max_constraint_violation_ = value; +} + +template +void mip_solution_t::set_max_int_violation(f_t value) +{ + max_int_violation_ = value; +} + +template +void mip_solution_t::set_max_variable_bound_violation(f_t value) +{ + max_variable_bound_violation_ = value; +} + +template +void mip_solution_t::set_nodes(i_t value) +{ + stats_.num_nodes = value; +} + +template +void mip_solution_t::set_simplex_iterations(i_t value) +{ + stats_.num_simplex_iterations = value; +} + +template +std::string mip_solution_t::get_error_string() const +{ + return error_status_.what(); +} + +template +i_t mip_solution_t::get_nodes() const +{ + return stats_.num_nodes; +} + +template +i_t mip_solution_t::get_simplex_iterations() const +{ + return stats_.num_simplex_iterations; +} + #if MIP_INSTANTIATE_FLOAT template class mip_solution_t; #endif From 8e360e98379b095b35ebb65a3f95c621f77dc033 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 6 Jan 2026 11:44:08 -0500 Subject: [PATCH 10/37] Fix protobuf serializer to handle row_types constraint format When users set constraints using set_row_types() + set_constraint_bounds() instead of set_constraint_lower/upper_bounds(), the serializer now computes the constraint lower/upper bounds from the row types: - 'E' (equality): lb = ub = rhs - 'L' (less-than): lb = -inf, ub = rhs - 'G' (greater-than): lb = rhs, ub = inf - 'N' (free): lb = -inf, ub = inf Also fixed server to always use streaming protocol for solutions (even when --no-stream is set, only log streaming is disabled). --- cpp/cuopt_remote_server.cpp | 15 ++--- .../utilities/protobuf_serializer.cu | 62 ++++++++++++++++--- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 86e30a42b..4b5f8193f 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -485,17 +485,10 @@ void handle_client(int client_fd, bool stream_logs) std::cout << "[Server] Completed job: " << job_id << " (success: " << success << ")\n"; } - // Send response - if (stream_logs) { - // Use streaming protocol - if (!send_solution_message(client_fd, result_data)) { - std::cerr << "[Server] Failed to send solution message\n"; - } - } else { - // Use legacy protocol - if (!send_response(client_fd, result_data)) { - std::cerr << "[Server] Failed to send response\n"; - } + // Send response - always use streaming protocol for consistency + // (stream_logs only controls whether LOG_MESSAGE are sent during solve) + if (!send_solution_message(client_fd, result_data)) { + std::cerr << "[Server] Failed to send solution message\n"; } close(client_fd); diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 3192c894b..62464bd79 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -525,14 +525,60 @@ class protobuf_serializer_t : public remote_serializer_t { pb_problem->add_variable_upper_bounds(static_cast(ub_ptr[i])); } - // Constraint bounds - auto c_lb_span = view.get_constraint_lower_bounds(); - auto c_ub_span = view.get_constraint_upper_bounds(); - const f_t* c_lb_ptr = c_lb_span.data(); - const f_t* c_ub_ptr = c_ub_span.data(); - for (i_t i = 0; i < n_rows; ++i) { - pb_problem->add_constraint_lower_bounds(static_cast(c_lb_ptr[i])); - pb_problem->add_constraint_upper_bounds(static_cast(c_ub_ptr[i])); + // Constraint bounds - need to handle both formats: + // 1. Direct lower/upper bounds (set_constraint_lower/upper_bounds) + // 2. RHS + row_types format (set_constraint_bounds + set_row_types) + auto c_lb_span = view.get_constraint_lower_bounds(); + auto c_ub_span = view.get_constraint_upper_bounds(); + + if (c_lb_span.size() == static_cast(n_rows) && + c_ub_span.size() == static_cast(n_rows)) { + // Direct format - use as-is + const f_t* c_lb_ptr = c_lb_span.data(); + const f_t* c_ub_ptr = c_ub_span.data(); + for (i_t i = 0; i < n_rows; ++i) { + pb_problem->add_constraint_lower_bounds(static_cast(c_lb_ptr[i])); + pb_problem->add_constraint_upper_bounds(static_cast(c_ub_ptr[i])); + } + } else { + // RHS + row_types format - compute lower/upper bounds + auto b_span = view.get_constraint_bounds(); + auto row_types_span = view.get_row_types(); + const f_t* b_ptr = b_span.data(); + const char* rt_ptr = row_types_span.data(); + + constexpr f_t inf = std::numeric_limits::infinity(); + + for (i_t i = 0; i < n_rows; ++i) { + f_t lb, ub; + char row_type = (rt_ptr && row_types_span.size() > 0) ? rt_ptr[i] : 'E'; + f_t rhs = (b_ptr && b_span.size() > 0) ? b_ptr[i] : 0; + + switch (row_type) { + case 'E': // Equality: lb = ub = rhs + lb = rhs; + ub = rhs; + break; + case 'L': // Less-than-or-equal: -inf <= Ax <= rhs + lb = -inf; + ub = rhs; + break; + case 'G': // Greater-than-or-equal: rhs <= Ax <= inf + lb = rhs; + ub = inf; + break; + case 'N': // Non-constraining (free) + lb = -inf; + ub = inf; + break; + default: // Default to equality + lb = rhs; + ub = rhs; + break; + } + pb_problem->add_constraint_lower_bounds(static_cast(lb)); + pb_problem->add_constraint_upper_bounds(static_cast(ub)); + } } // Variable names (if available) From f048fe97c268b2c13122ee8f5bc3a7ebf93e60fa Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 6 Jan 2026 11:59:35 -0500 Subject: [PATCH 11/37] Enable MIP remote solve with variable type serialization - Integrate solve_mip_remote() into solve_mip() dispatch - Serialize variable types (integer/binary) to protobuf is_integer/is_binary fields - Deserialize variable types on server side - Fix is_mip_request() to check header.problem_type for reliable detection MIP remote solve now correctly handles integer and binary variables. --- .../utilities/protobuf_serializer.cu | 47 ++++++++++++++++--- cpp/src/mip/solve.cu | 16 +++---- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 62464bd79..c3df71883 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -184,16 +184,22 @@ class protobuf_serializer_t : public remote_serializer_t { // Try to parse as async request first cuopt::remote::AsyncRequest async_request; if (async_request.ParseFromArray(data.data(), data.size())) { - return async_request.has_mip_request(); + if (async_request.has_mip_request()) { return true; } + if (async_request.has_lp_request()) { return false; } } - // Try LP request - cuopt::remote::SolveLPRequest lp_request; - if (lp_request.ParseFromArray(data.data(), data.size())) { return false; } - - // Try MIP request + // Try to parse as direct request and check the header's problem_type + // MIP request - check if header indicates MIP cuopt::remote::SolveMIPRequest mip_request; - if (mip_request.ParseFromArray(data.data(), data.size())) { return true; } + if (mip_request.ParseFromArray(data.data(), data.size()) && mip_request.has_header()) { + if (mip_request.header().problem_type() == cuopt::remote::MIP) { return true; } + } + + // LP request - check if header indicates LP + cuopt::remote::SolveLPRequest lp_request; + if (lp_request.ParseFromArray(data.data(), data.size()) && lp_request.has_header()) { + if (lp_request.header().problem_type() == cuopt::remote::LP) { return false; } + } return false; // Default to LP if can't determine } @@ -592,6 +598,18 @@ class protobuf_serializer_t : public remote_serializer_t { for (const auto& name : row_names) { pb_problem->add_row_names(name); } + + // Variable types (for MIP problems) + auto var_types_span = view.get_variable_types(); + if (var_types_span.size() > 0) { + const char* var_types_ptr = var_types_span.data(); + for (i_t i = 0; i < n_cols; ++i) { + char vtype = var_types_ptr[i]; + // 'I' = integer, 'B' = binary, 'C' = continuous (default) + pb_problem->add_is_integer(vtype == 'I' || vtype == 'B'); + pb_problem->add_is_binary(vtype == 'B'); + } + } } void serialize_lp_settings_to_proto(const pdlp_solver_settings_t& settings, @@ -690,6 +708,21 @@ class protobuf_serializer_t : public remote_serializer_t { pb_problem.row_names().end()); mps_data.set_row_names(row_names); } + + // Variable types (for MIP problems) + // is_integer and is_binary arrays indicate variable types + if (pb_problem.is_integer_size() > 0) { + i_t n_vars = static_cast(pb_problem.is_integer_size()); + std::vector var_types(n_vars, 'C'); // Default to continuous + for (i_t i = 0; i < n_vars; ++i) { + if (pb_problem.is_binary_size() > i && pb_problem.is_binary(i)) { + var_types[i] = 'B'; // Binary + } else if (pb_problem.is_integer(i)) { + var_types[i] = 'I'; // Integer + } + } + mps_data.set_variable_types(var_types); + } } void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings, diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index d5d9238c1..1886a4418 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -31,6 +31,8 @@ #include +#include + #include #include #include @@ -538,11 +540,8 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, "[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), remote_config->port); - // TODO: Implement remote solve - serialize cpu_view and send to remote server - CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); - // Use CPU-only constructor - no stream needed for remote solve error - return mip_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); + // Remote solve with GPU data - serialize cpu_view and send to remote server + return solve_mip_remote(*remote_config, cpu_view, settings); } // Local solve: data already on GPU - convert view to optimization_problem_t and solve @@ -555,11 +554,8 @@ mip_solution_t solve_mip(raft::handle_t const* handle_ptr, CUOPT_LOG_INFO("[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d", remote_config->host.c_str(), remote_config->port); - // TODO: Implement remote solve - serialize view (CPU memory) and send to remote server - CUOPT_LOG_ERROR("[solve_mip] Remote solve not yet implemented"); - // Use CPU-only constructor - no stream/handle needed for remote solve - return mip_solution_t( - cuopt::logic_error("Remote solve not yet implemented", cuopt::error_type_t::RuntimeError)); + // Remote solve with CPU data - serialize view and send to remote server + return solve_mip_remote(*remote_config, view, settings); } // Local solve with CPU data: copy to GPU and solve From 6c094d2395c4c6e014bf0426627ed15fdf8a7131 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 6 Jan 2026 13:55:22 -0500 Subject: [PATCH 12/37] Add pluggable serialization and to_host() for remote solve - Add to_host() method to LP/MIP solution classes for GPU->CPU copy - Server calls to_host() before serialization, simplifying serializer impl - Add dynamic serializer loading via CUOPT_SERIALIZER_LIB env var - Add MsgPack serializer plugin as proof-of-concept - Add serialization plugin development guide --- cpp/CMakeLists.txt | 5 + cpp/cuopt_remote_server.cpp | 13 + .../mip/solver_solution.hpp | 14 + .../pdlp/solver_solution.hpp | 14 + cpp/src/linear_programming/solver_solution.cu | 51 ++ .../utilities/protobuf_serializer.cu | 48 +- .../utilities/serializers/CMakeLists.txt | 55 ++ .../serializers/msgpack_serializer.cpp | 594 +++++++++++++++++ cpp/src/mip/solver_solution.cu | 26 + docs/developer/SERIALIZATION_PLUGIN_GUIDE.md | 595 ++++++++++++++++++ 10 files changed, 1411 insertions(+), 4 deletions(-) create mode 100644 cpp/src/linear_programming/utilities/serializers/CMakeLists.txt create mode 100644 cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp create mode 100644 docs/developer/SERIALIZATION_PLUGIN_GUIDE.md diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 15be59b94..67468534f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -626,6 +626,11 @@ if(BUILD_REMOTE_SERVER AND NOT BUILD_LP_ONLY) target_link_options(local_solve_test PRIVATE -Wl,--enable-new-dtags) endif() +# ################################################################################################## +# - Pluggable Serializers -------------------------------------------------------------------------- +# Build optional serializer plugins (e.g., msgpack) +add_subdirectory(src/linear_programming/utilities/serializers) + # ################################################################################################## # - CPack has to be the last item in the cmake file------------------------------------------------- # Used to create an installable deb package for cuOpt diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 4b5f8193f..63fbe7680 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -337,6 +337,10 @@ void worker_thread(int worker_id) // Solve using the data model directly auto solution = solve_mip(&handle, mps_data, settings); + // Move solution to CPU memory for serialization + // (remote solve clients are typically CPU-only hosts) + solution.to_host(handle.get_stream()); + // Serialize result result_data = serializer->serialize_mip_solution(solution); success = true; @@ -375,6 +379,10 @@ void worker_thread(int worker_id) // Solve using the data model directly auto solution = solve_lp(&handle, mps_data, settings); + // Move solution to CPU memory for serialization + // (remote solve clients are typically CPU-only hosts) + solution.to_host(handle.get_stream()); + // Serialize result result_data = serializer->serialize_lp_solution(solution); success = true; @@ -487,8 +495,13 @@ void handle_client(int client_fd, bool stream_logs) // Send response - always use streaming protocol for consistency // (stream_logs only controls whether LOG_MESSAGE are sent during solve) + std::cout << "[Server] Sending solution message, size = " << result_data.size() << " bytes\n"; + std::cout.flush(); if (!send_solution_message(client_fd, result_data)) { std::cerr << "[Server] Failed to send solution message\n"; + } else { + std::cout << "[Server] Solution message sent successfully\n"; + std::cout.flush(); } close(client_fd); diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp index 1315f5ec2..4d896cf7d 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp @@ -179,6 +179,20 @@ class mip_solution_t : public base_solution_t { */ i_t get_simplex_iterations() const; + /** + * @brief Copy solution data from GPU to CPU memory. + * + * After calling this method, is_device_memory() will return false and + * the solution can be accessed via get_solution_host(). + * This is useful for remote solve scenarios where serialization requires + * CPU-accessible data. + * + * If the solution is already in CPU memory, this is a no-op. + * + * @param stream_view The CUDA stream to use for the copy + */ + void to_host(rmm::cuda_stream_view stream_view); + private: // GPU (device) storage - populated for local GPU solves std::unique_ptr> solution_; diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index d975ea11e..9d7e34831 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -475,6 +475,20 @@ class optimization_problem_solution_t : public base_solution_t { void copy_from(const raft::handle_t* handle_ptr, const optimization_problem_solution_t& other); + /** + * @brief Copy solution data from GPU to CPU memory. + * + * After calling this method, is_device_memory() will return false and + * the solution can be accessed via get_primal_solution_host(), etc. + * This is useful for remote solve scenarios where serialization requires + * CPU-accessible data. + * + * If the solution is already in CPU memory, this is a no-op. + * + * @param stream_view The CUDA stream to use for the copy + */ + void to_host(rmm::cuda_stream_view stream_view); + private: void write_additional_termination_statistics_to_file(std::ofstream& myfile); diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu index 79dfdb774..45ed3dcd3 100644 --- a/cpp/src/linear_programming/solver_solution.cu +++ b/cpp/src/linear_programming/solver_solution.cu @@ -659,6 +659,57 @@ void optimization_problem_solution_t::write_to_sol_file( std::string(filename), status, objective_value, var_names_, solution); } +template +void optimization_problem_solution_t::to_host(rmm::cuda_stream_view stream_view) +{ + if (!is_device_memory_) { + // Already on CPU, nothing to do + return; + } + + // Initialize host storage if needed + if (!primal_solution_host_) { primal_solution_host_ = std::make_unique>(); } + if (!dual_solution_host_) { dual_solution_host_ = std::make_unique>(); } + if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique>(); } + + // Copy primal solution + if (primal_solution_ && primal_solution_->size() > 0) { + primal_solution_host_->resize(primal_solution_->size()); + raft::copy(primal_solution_host_->data(), + primal_solution_->data(), + primal_solution_->size(), + stream_view.value()); + } + + // Copy dual solution + if (dual_solution_ && dual_solution_->size() > 0) { + dual_solution_host_->resize(dual_solution_->size()); + raft::copy(dual_solution_host_->data(), + dual_solution_->data(), + dual_solution_->size(), + stream_view.value()); + } + + // Copy reduced cost + if (reduced_cost_ && reduced_cost_->size() > 0) { + reduced_cost_host_->resize(reduced_cost_->size()); + raft::copy(reduced_cost_host_->data(), + reduced_cost_->data(), + reduced_cost_->size(), + stream_view.value()); + } + + // Synchronize to ensure copies are complete + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + + // Clear GPU storage to free memory + primal_solution_.reset(); + dual_solution_.reset(); + reduced_cost_.reset(); + + is_device_memory_ = false; +} + #if MIP_INSTANTIATE_FLOAT template class optimization_problem_solution_t; #endif diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index c3df71883..9a81762be 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -9,6 +9,7 @@ #include +#include #include #include @@ -979,10 +980,49 @@ std::shared_ptr> get_serializer() // Try to load custom serializer CUOPT_LOG_INFO("[remote_solve] Loading custom serializer from: {}", custom_lib); - // Dynamic loading would go here - // For now, fall back to default - CUOPT_LOG_WARN("[remote_solve] Custom serializer loading not yet implemented, using default"); - instance = get_default_serializer(); + // Open the shared library + void* handle = dlopen(custom_lib, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + CUOPT_LOG_ERROR("[remote_solve] Failed to load serializer library: {}", dlerror()); + instance = get_default_serializer(); + return; + } + + // Look for the factory function + // The function name includes template types for proper linking + std::string factory_name = "create_cuopt_serializer"; + if constexpr (std::is_same_v && std::is_same_v) { + factory_name = "create_cuopt_serializer_i32_f64"; + } else if constexpr (std::is_same_v && std::is_same_v) { + factory_name = "create_cuopt_serializer_i32_f32"; + } else if constexpr (std::is_same_v && std::is_same_v) { + factory_name = "create_cuopt_serializer_i64_f64"; + } else if constexpr (std::is_same_v && std::is_same_v) { + factory_name = "create_cuopt_serializer_i64_f32"; + } + + using factory_fn_t = std::unique_ptr> (*)(); + auto factory = reinterpret_cast(dlsym(handle, factory_name.c_str())); + + if (!factory) { + CUOPT_LOG_ERROR( + "[remote_solve] Factory function '{}' not found: {}", factory_name, dlerror()); + dlclose(handle); + instance = get_default_serializer(); + return; + } + + auto custom_serializer = factory(); + if (custom_serializer) { + CUOPT_LOG_INFO("[remote_solve] Using custom serializer: {}", + custom_serializer->format_name()); + instance = std::move(custom_serializer); + } else { + CUOPT_LOG_ERROR("[remote_solve] Factory returned null, using default"); + dlclose(handle); + instance = get_default_serializer(); + } + // Note: We intentionally don't dlclose(handle) here to keep the library loaded } else { instance = get_default_serializer(); } diff --git a/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt b/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt new file mode 100644 index 000000000..f5f509089 --- /dev/null +++ b/cpp/src/linear_programming/utilities/serializers/CMakeLists.txt @@ -0,0 +1,55 @@ +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on + +# MsgPack Serializer - demonstrates pluggable serialization +# +# This builds a separate shared library that implements the remote_serializer_t interface +# using MsgPack instead of Protocol Buffers. + +find_package(msgpack-cxx QUIET) + +if(msgpack-cxx_FOUND) + message(STATUS "Building msgpack serializer plugin") + + add_library(cuopt_msgpack_serializer SHARED + msgpack_serializer.cpp + ) + + target_include_directories(cuopt_msgpack_serializer + PRIVATE + "${CMAKE_SOURCE_DIR}/include" + "${CMAKE_SOURCE_DIR}/libmps_parser/include" + "${CMAKE_SOURCE_DIR}/src" + ${CUDA_INCLUDE_DIRS} + ) + + # Required for RMM headers + target_compile_definitions(cuopt_msgpack_serializer + PRIVATE + LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE + ) + + target_link_libraries(cuopt_msgpack_serializer + PRIVATE + msgpack-cxx + rmm::rmm + raft::raft + cuopt + ) + + set_target_properties(cuopt_msgpack_serializer PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + ) + + # Install the plugin + install(TARGETS cuopt_msgpack_serializer + COMPONENT runtime + LIBRARY DESTINATION lib + ) +else() + message(STATUS "msgpack-cxx not found, skipping msgpack serializer plugin") +endif() diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp new file mode 100644 index 000000000..56084fa24 --- /dev/null +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -0,0 +1,594 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * MsgPack-based serializer for cuOpt remote solve. + * This demonstrates the pluggable serialization interface. + * + * NOTE: This is a CPU-only serializer. For solutions on GPU memory, it will + * return empty solution vectors. For production use with GPU memory support, + * convert this to a .cu file and use CUDA memory copy operations. + */ + +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming { + +// Message type identifiers +constexpr uint8_t MSG_LP_REQUEST = 1; +constexpr uint8_t MSG_MIP_REQUEST = 2; +constexpr uint8_t MSG_LP_SOLUTION = 3; +constexpr uint8_t MSG_MIP_SOLUTION = 4; + +template +class msgpack_serializer_t : public remote_serializer_t { + public: + msgpack_serializer_t() { std::cout << "[msgpack_serializer] Initialized\n"; } + + ~msgpack_serializer_t() override = default; + + std::string format_name() const override { return "msgpack"; } + + uint32_t protocol_version() const override { return 1; } + + //============================================================================ + // LP Request Serialization + //============================================================================ + + std::vector serialize_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + pk.pack_uint8(MSG_LP_REQUEST); + pk.pack_uint32(protocol_version()); + pack_problem(pk, view); + + // Pack LP settings + pk.pack_map(4); + pk.pack("time_limit"); + pk.pack(settings.time_limit); + pk.pack("iteration_limit"); + pk.pack(static_cast(settings.iteration_limit)); + pk.pack("abs_gap_tol"); + pk.pack(settings.tolerances.absolute_gap_tolerance); + pk.pack("rel_gap_tol"); + pk.pack(settings.tolerances.relative_gap_tolerance); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + pk.pack_uint8(MSG_MIP_REQUEST); + pk.pack_uint32(protocol_version()); + pack_problem(pk, view); + + pk.pack_map(2); + pk.pack("time_limit"); + pk.pack(settings.time_limit); + pk.pack("mip_gap"); + pk.pack(settings.tolerances.relative_mip_gap); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + //============================================================================ + // Solution Deserialization (client-side) + //============================================================================ + + optimization_problem_solution_t deserialize_lp_solution( + const std::vector& data) override + { + try { + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size()); + msgpack::object obj = oh.get(); + + if (obj.type != msgpack::type::ARRAY || obj.via.array.size < 6) { + return optimization_problem_solution_t( + cuopt::logic_error("Invalid LP solution format", cuopt::error_type_t::RuntimeError)); + } + + auto& arr = obj.via.array; + auto status = static_cast(arr.ptr[1].as()); + f_t obj_value = arr.ptr[2].as(); + + std::vector primal_sol; + arr.ptr[3].convert(primal_sol); + + std::vector dual_sol; + arr.ptr[4].convert(dual_sol); + + f_t solve_time = arr.ptr[5].as(); + + optimization_problem_solution_t solution(status); + solution.set_primal_objective(obj_value); + solution.set_primal_solution_host(std::move(primal_sol)); + solution.set_dual_solution_host(std::move(dual_sol)); + solution.set_solve_time(solve_time); + + return solution; + } catch (const std::exception& e) { + return optimization_problem_solution_t(cuopt::logic_error( + std::string("MsgPack LP parse error: ") + e.what(), cuopt::error_type_t::RuntimeError)); + } + } + + mip_solution_t deserialize_mip_solution(const std::vector& data) override + { + try { + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size()); + msgpack::object obj = oh.get(); + + if (obj.type != msgpack::type::ARRAY || obj.via.array.size < 6) { + return mip_solution_t( + cuopt::logic_error("Invalid MIP solution format", cuopt::error_type_t::RuntimeError)); + } + + auto& arr = obj.via.array; + auto status = static_cast(arr.ptr[1].as()); + f_t obj_value = arr.ptr[2].as(); + + std::vector sol; + arr.ptr[3].convert(sol); + + f_t solve_time = arr.ptr[4].as(); + f_t mip_gap = arr.ptr[5].as(); + + solver_stats_t stats; + stats.total_solve_time = solve_time; + + mip_solution_t solution(std::move(sol), + std::vector{}, + obj_value, + mip_gap, + status, + 0.0, + 0.0, + 0.0, + stats); + + return solution; + } catch (const std::exception& e) { + return mip_solution_t(cuopt::logic_error( + std::string("MsgPack MIP parse error: ") + e.what(), cuopt::error_type_t::RuntimeError)); + } + } + + //============================================================================ + // Server-side Operations + //============================================================================ + + bool is_mip_request(const std::vector& data) override + { + if (data.empty()) return false; + try { + size_t offset = 0; + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + if (oh.get().type == msgpack::type::POSITIVE_INTEGER) { + return oh.get().as() == MSG_MIP_REQUEST; + } + } catch (...) { + } + return false; + } + + bool deserialize_lp_request(const std::vector& data, + mps_parser::mps_data_model_t& mps_data, + pdlp_solver_settings_t& settings) override + { + try { + size_t offset = 0; + + msgpack::object_handle oh1 = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh1.get().as(); + if (msg_type != MSG_LP_REQUEST) return false; + + msgpack::object_handle oh2 = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // version = oh2.get().as(); + + unpack_problem(data, offset, mps_data); + + msgpack::object_handle oh_settings = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + auto settings_map = oh_settings.get().as>(); + + if (settings_map.count("time_limit")) { + settings.time_limit = settings_map["time_limit"].as(); + } + if (settings_map.count("iteration_limit")) { + settings.iteration_limit = settings_map["iteration_limit"].as(); + } + if (settings_map.count("abs_gap_tol")) { + settings.tolerances.absolute_gap_tolerance = settings_map["abs_gap_tol"].as(); + } + if (settings_map.count("rel_gap_tol")) { + settings.tolerances.relative_gap_tolerance = settings_map["rel_gap_tol"].as(); + } + + return true; + } catch (const std::exception& e) { + std::cerr << "[msgpack_serializer] LP request parse error: " << e.what() << "\n"; + return false; + } + } + + bool deserialize_mip_request(const std::vector& data, + mps_parser::mps_data_model_t& mps_data, + mip_solver_settings_t& settings) override + { + try { + size_t offset = 0; + + msgpack::object_handle oh1 = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh1.get().as(); + if (msg_type != MSG_MIP_REQUEST) return false; + + msgpack::object_handle oh2 = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + + unpack_problem(data, offset, mps_data); + + msgpack::object_handle oh_settings = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + auto settings_map = oh_settings.get().as>(); + + if (settings_map.count("time_limit")) { + settings.time_limit = settings_map["time_limit"].as(); + } + if (settings_map.count("mip_gap")) { + settings.tolerances.relative_mip_gap = settings_map["mip_gap"].as(); + } + + return true; + } catch (const std::exception& e) { + std::cerr << "[msgpack_serializer] MIP request parse error: " << e.what() << "\n"; + return false; + } + } + + std::vector serialize_lp_solution( + const optimization_problem_solution_t& solution) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + // Pack as array: [msg_type, status, obj_value, primal_sol, dual_sol, solve_time] + pk.pack_array(6); + pk.pack_uint8(MSG_LP_SOLUTION); + pk.pack(static_cast(solution.get_termination_status())); + pk.pack(static_cast(solution.get_objective_value())); + + // Note: If solution is on GPU, we can't access it from pure C++ code + // For production, this should be a .cu file with CUDA support + if (!solution.is_device_memory()) { + auto primal = solution.get_primal_solution_host(); + pk.pack_array(primal.size()); + for (size_t i = 0; i < primal.size(); ++i) { + pk.pack(static_cast(primal[i])); + } + + auto dual = solution.get_dual_solution_host(); + pk.pack_array(dual.size()); + for (size_t i = 0; i < dual.size(); ++i) { + pk.pack(static_cast(dual[i])); + } + } else { + // GPU memory - return empty arrays (limitation of pure C++ serializer) + pk.pack_array(0); + pk.pack_array(0); + } + + pk.pack(static_cast(solution.get_solve_time())); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_mip_solution(const mip_solution_t& solution) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + pk.pack_array(6); + pk.pack_uint8(MSG_MIP_SOLUTION); + pk.pack(static_cast(solution.get_termination_status())); + pk.pack(static_cast(solution.get_objective_value())); + + if (!solution.is_device_memory()) { + auto sol = solution.get_solution_host(); + pk.pack_array(sol.size()); + for (size_t i = 0; i < sol.size(); ++i) { + pk.pack(static_cast(sol[i])); + } + } else { + pk.pack_array(0); + } + + pk.pack(static_cast(solution.get_stats().total_solve_time)); + pk.pack(static_cast(solution.get_mip_gap())); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + //============================================================================ + // Async Operations (simplified for testing) + //============================================================================ + + std::vector serialize_async_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings, + bool blocking) override + { + return serialize_lp_request(view, settings); + } + + std::vector serialize_async_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings, + bool blocking) override + { + return serialize_mip_request(view, settings); + } + + std::vector serialize_status_request(const std::string& job_id) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_map(1); + pk.pack("job_id"); + pk.pack(job_id); + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_get_result_request(const std::string& job_id) override + { + return serialize_status_request(job_id); + } + + std::vector serialize_delete_request(const std::string& job_id) override + { + return serialize_status_request(job_id); + } + + bool deserialize_submit_response(const std::vector& data, + std::string& job_id, + std::string& error_message) override + { + job_id = "msgpack_job"; + return true; + } + + typename remote_serializer_t::job_status_t deserialize_status_response( + const std::vector& data) override + { + return remote_serializer_t::job_status_t::COMPLETED; + } + + optimization_problem_solution_t deserialize_lp_result_response( + const std::vector& data) override + { + return deserialize_lp_solution(data); + } + + mip_solution_t deserialize_mip_result_response( + const std::vector& data) override + { + return deserialize_mip_solution(data); + } + + private: + void pack_problem(msgpack::packer& pk, + const mps_parser::data_model_view_t& view) + { + auto offsets_span = view.get_constraint_matrix_offsets(); + auto values_span = view.get_constraint_matrix_values(); + auto obj_span = view.get_objective_coefficients(); + + i_t n_rows = static_cast(offsets_span.size()) - 1; + i_t n_cols = static_cast(obj_span.size()); + i_t nnz = static_cast(values_span.size()); + + pk.pack_map(13); + + pk.pack("n_rows"); + pk.pack(static_cast(n_rows)); + + pk.pack("n_cols"); + pk.pack(static_cast(n_cols)); + + pk.pack("nnz"); + pk.pack(static_cast(nnz)); + + pk.pack("maximize"); + pk.pack(view.get_sense()); + + // CSR matrix + pk.pack("A_values"); + auto A_vals = view.get_constraint_matrix_values(); + pk.pack_array(A_vals.size()); + for (size_t i = 0; i < A_vals.size(); ++i) { + pk.pack(static_cast(A_vals.data()[i])); + } + + pk.pack("A_indices"); + auto A_idx = view.get_constraint_matrix_indices(); + pk.pack_array(A_idx.size()); + for (size_t i = 0; i < A_idx.size(); ++i) { + pk.pack(static_cast(A_idx.data()[i])); + } + + pk.pack("A_offsets"); + auto A_off = view.get_constraint_matrix_offsets(); + pk.pack_array(A_off.size()); + for (size_t i = 0; i < A_off.size(); ++i) { + pk.pack(static_cast(A_off.data()[i])); + } + + pk.pack("obj_coeffs"); + auto obj = view.get_objective_coefficients(); + pk.pack_array(obj.size()); + for (size_t i = 0; i < obj.size(); ++i) { + pk.pack(static_cast(obj.data()[i])); + } + + pk.pack("var_lb"); + auto vlb = view.get_variable_lower_bounds(); + pk.pack_array(vlb.size()); + for (size_t i = 0; i < vlb.size(); ++i) { + pk.pack(static_cast(vlb.data()[i])); + } + + pk.pack("var_ub"); + auto vub = view.get_variable_upper_bounds(); + pk.pack_array(vub.size()); + for (size_t i = 0; i < vub.size(); ++i) { + pk.pack(static_cast(vub.data()[i])); + } + + // Constraint bounds - derive from row_types if needed + pk.pack("con_lb"); + auto clb = view.get_constraint_lower_bounds(); + if (clb.size() > 0) { + pk.pack_array(clb.size()); + for (size_t i = 0; i < clb.size(); ++i) { + pk.pack(static_cast(clb.data()[i])); + } + } else { + auto b_span = view.get_constraint_bounds(); + auto rt_span = view.get_row_types(); + pk.pack_array(n_rows); + for (i_t i = 0; i < n_rows; ++i) { + char rt = (rt_span.size() > 0) ? rt_span.data()[i] : 'E'; + f_t b = (b_span.size() > 0) ? b_span.data()[i] : 0; + f_t lb = (rt == 'G' || rt == 'E') ? b : -1e20; + pk.pack(static_cast(lb)); + } + } + + pk.pack("con_ub"); + auto cub = view.get_constraint_upper_bounds(); + if (cub.size() > 0) { + pk.pack_array(cub.size()); + for (size_t i = 0; i < cub.size(); ++i) { + pk.pack(static_cast(cub.data()[i])); + } + } else { + auto b_span = view.get_constraint_bounds(); + auto rt_span = view.get_row_types(); + pk.pack_array(n_rows); + for (i_t i = 0; i < n_rows; ++i) { + char rt = (rt_span.size() > 0) ? rt_span.data()[i] : 'E'; + f_t b = (b_span.size() > 0) ? b_span.data()[i] : 0; + f_t ub = (rt == 'L' || rt == 'E') ? b : 1e20; + pk.pack(static_cast(ub)); + } + } + + // Variable types + pk.pack("var_types"); + auto vt = view.get_variable_types(); + pk.pack_array(n_cols); + for (i_t i = 0; i < n_cols; ++i) { + char t = (vt.size() > 0) ? vt.data()[i] : 'C'; + pk.pack(std::string(1, t)); + } + } + + void unpack_problem(const std::vector& data, + size_t& offset, + mps_parser::mps_data_model_t& mps_data) + { + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + auto problem_map = oh.get().as>(); + + i_t n_cols = problem_map["n_cols"].as(); + + mps_data.set_maximize(problem_map["maximize"].as()); + + // CSR matrix + std::vector A_values; + problem_map["A_values"].convert(A_values); + std::vector A_indices; + problem_map["A_indices"].convert(A_indices); + std::vector A_offsets; + problem_map["A_offsets"].convert(A_offsets); + + mps_data.set_csr_constraint_matrix(A_values.data(), + static_cast(A_values.size()), + A_indices.data(), + static_cast(A_indices.size()), + A_offsets.data(), + static_cast(A_offsets.size())); + + std::vector obj_coeffs; + problem_map["obj_coeffs"].convert(obj_coeffs); + mps_data.set_objective_coefficients(obj_coeffs.data(), static_cast(obj_coeffs.size())); + + std::vector var_lb, var_ub; + problem_map["var_lb"].convert(var_lb); + problem_map["var_ub"].convert(var_ub); + mps_data.set_variable_lower_bounds(var_lb.data(), static_cast(var_lb.size())); + mps_data.set_variable_upper_bounds(var_ub.data(), static_cast(var_ub.size())); + + std::vector con_lb, con_ub; + problem_map["con_lb"].convert(con_lb); + problem_map["con_ub"].convert(con_ub); + mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); + mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); + + // Variable types + std::vector var_types; + if (problem_map["var_types"].type == msgpack::type::ARRAY) { + auto arr = problem_map["var_types"].via.array; + for (size_t i = 0; i < arr.size; ++i) { + if (arr.ptr[i].type == msgpack::type::STR) { + std::string s; + arr.ptr[i].convert(s); + var_types.push_back(s.empty() ? 'C' : s[0]); + } else { + var_types.push_back('C'); + } + } + } else { + var_types.resize(n_cols, 'C'); + } + mps_data.set_variable_types(var_types); + } +}; + +} // namespace cuopt::linear_programming + +//============================================================================ +// Factory Functions (exported for dynamic loading) +//============================================================================ + +extern "C" { + +std::unique_ptr> +create_cuopt_serializer_i32_f64() +{ + return std::make_unique>(); +} + +} // extern "C" diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu index 43f5a0cab..fd8707732 100644 --- a/cpp/src/mip/solver_solution.cu +++ b/cpp/src/mip/solver_solution.cu @@ -407,6 +407,32 @@ i_t mip_solution_t::get_simplex_iterations() const return stats_.num_simplex_iterations; } +template +void mip_solution_t::to_host(rmm::cuda_stream_view stream_view) +{ + if (!is_device_memory_) { + // Already on CPU, nothing to do + return; + } + + // Initialize host storage if needed + if (!solution_host_) { solution_host_ = std::make_unique>(); } + + // Copy solution + if (solution_ && solution_->size() > 0) { + solution_host_->resize(solution_->size()); + raft::copy(solution_host_->data(), solution_->data(), solution_->size(), stream_view.value()); + + // Synchronize to ensure copy is complete + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value())); + } + + // Clear GPU storage to free memory + solution_.reset(); + + is_device_memory_ = false; +} + #if MIP_INSTANTIATE_FLOAT template class mip_solution_t; #endif diff --git a/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md new file mode 100644 index 000000000..605f73d50 --- /dev/null +++ b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md @@ -0,0 +1,595 @@ +# cuOpt Remote Solve Serialization Plugin Guide + +This guide explains how to develop custom serialization plugins for cuOpt's remote solve feature. Plugins allow you to replace the default Protocol Buffers serialization with alternative formats like MsgPack, JSON, FlatBuffers, or custom binary protocols. + +## Overview + +The remote solve feature uses a pluggable serialization interface (`remote_serializer_t`) that handles: +- Serializing optimization problems (LP/MIP) for network transmission +- Deserializing solver settings +- Serializing solutions back to the client +- Message type identification (LP vs MIP) + +``` +┌─────────────┐ ┌─────────────┐ +│ Client │ │ Server │ +│ │ serialize_lp_request() │ │ +│ Problem ───┼──────────────────────────────┼──► Problem │ +│ │ │ │ +│ │ serialize_lp_solution() │ │ +│ Solution ◄─┼──────────────────────────────┼─── Solution │ +└─────────────┘ └─────────────┘ + ▲ ▲ + │ │ + └────────── Same Serializer ───────────────┘ +``` + +**Important**: Both client and server must use the same serializer for communication to work. + +## The Serializer Interface + +Your plugin must implement the `remote_serializer_t` interface defined in: +`cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp` + +### Required Methods + +```cpp +template +class remote_serializer_t { +public: + virtual ~remote_serializer_t() = default; + + // ═══════════════════════════════════════════════════════════════════ + // CLIENT-SIDE: Serialize requests, deserialize solutions + // ═══════════════════════════════════════════════════════════════════ + + // Serialize an LP problem and settings into bytes for transmission + virtual std::vector serialize_lp_request( + const mps_parser::data_model_view_t& problem, + const pdlp_solver_settings_t& settings) = 0; + + // Serialize a MIP problem and settings into bytes + virtual std::vector serialize_mip_request( + const mps_parser::data_model_view_t& problem, + const mip_solver_settings_t& settings) = 0; + + // Deserialize an LP solution from bytes received from server + virtual optimization_problem_solution_t deserialize_lp_solution( + const std::vector& data) = 0; + + // Deserialize a MIP solution from bytes + virtual mip_solution_t deserialize_mip_solution( + const std::vector& data) = 0; + + // ═══════════════════════════════════════════════════════════════════ + // SERVER-SIDE: Deserialize requests, serialize solutions + // ═══════════════════════════════════════════════════════════════════ + + // Check if the received data is a MIP request (vs LP) + virtual bool is_mip_request(const std::vector& data) = 0; + + // Deserialize LP request into problem data and settings + virtual bool deserialize_lp_request( + const std::vector& data, + mps_parser::mps_data_model_t& problem_data, + pdlp_solver_settings_t& settings) = 0; + + // Deserialize MIP request into problem data and settings + virtual bool deserialize_mip_request( + const std::vector& data, + mps_parser::mps_data_model_t& problem_data, + mip_solver_settings_t& settings) = 0; + + // Serialize LP solution for transmission back to client + virtual std::vector serialize_lp_solution( + const optimization_problem_solution_t& solution) = 0; + + // Serialize MIP solution + virtual std::vector serialize_mip_solution( + const mip_solution_t& solution) = 0; + + // ═══════════════════════════════════════════════════════════════════ + // METADATA + // ═══════════════════════════════════════════════════════════════════ + + // Human-readable format name (e.g., "msgpack", "json", "flatbuffers") + virtual std::string format_name() const = 0; + + // Protocol version for compatibility checking + virtual uint32_t protocol_version() const = 0; +}; +``` + +### Factory Function + +Your plugin must export a factory function that creates the serializer: + +```cpp +extern "C" { + // For int32_t indices, double floats (most common) + std::unique_ptr> + create_cuopt_serializer_i32_f64(); + + // Additional type combinations if needed + std::unique_ptr> + create_cuopt_serializer_i32_f32(); +} +``` + +## Step-by-Step Implementation + +### Step 1: Create the Plugin Source File + +Create `cpp/src/linear_programming/utilities/serializers/my_serializer.cpp`: + +```cpp +#include +#include +#include + +namespace cuopt::linear_programming { + +// Message type identifiers (first byte of each message) +constexpr uint8_t MSG_LP_REQUEST = 1; +constexpr uint8_t MSG_MIP_REQUEST = 2; +constexpr uint8_t MSG_LP_SOLUTION = 3; +constexpr uint8_t MSG_MIP_SOLUTION = 4; + +template +class my_serializer_t : public remote_serializer_t { +public: + my_serializer_t() = default; + ~my_serializer_t() override = default; + + std::string format_name() const override { return "my_format"; } + uint32_t protocol_version() const override { return 1; } + + //======================================================================== + // CLIENT-SIDE METHODS + //======================================================================== + + std::vector serialize_lp_request( + const mps_parser::data_model_view_t& view, + const pdlp_solver_settings_t& settings) override + { + std::vector buffer; + + // Start with message type + buffer.push_back(MSG_LP_REQUEST); + + // Serialize problem dimensions + i_t n_rows = view.get_constraint_matrix_offsets().size() > 0 + ? view.get_constraint_matrix_offsets().size() - 1 : 0; + i_t n_cols = view.get_objective_coefficients().size(); + i_t nnz = view.get_constraint_matrix_values().size(); + + // ... serialize all problem data ... + // See msgpack_serializer.cpp for complete example + + return buffer; + } + + std::vector serialize_mip_request( + const mps_parser::data_model_view_t& view, + const mip_solver_settings_t& settings) override + { + std::vector buffer; + buffer.push_back(MSG_MIP_REQUEST); + // ... similar to LP but with MIP settings ... + return buffer; + } + + optimization_problem_solution_t deserialize_lp_solution( + const std::vector& data) override + { + // Parse the solution data + // Create and return solution object + + // On error, return error solution: + // return optimization_problem_solution_t( + // cuopt::logic_error("Parse error", cuopt::error_type_t::RuntimeError)); + } + + mip_solution_t deserialize_mip_solution( + const std::vector& data) override + { + // Similar to LP solution + } + + //======================================================================== + // SERVER-SIDE METHODS + //======================================================================== + + bool is_mip_request(const std::vector& data) override + { + if (data.empty()) return false; + return data[0] == MSG_MIP_REQUEST; + } + + bool deserialize_lp_request( + const std::vector& data, + mps_parser::mps_data_model_t& mps_data, + pdlp_solver_settings_t& settings) override + { + try { + // Parse message type + if (data.empty() || data[0] != MSG_LP_REQUEST) return false; + + // Parse problem data and populate mps_data: + // mps_data.set_problem_name("..."); + // mps_data.set_objective_coefficients(coeffs.data(), coeffs.size()); + // mps_data.set_csr_constraint_matrix(...); + // mps_data.set_variable_bounds(...); + // mps_data.set_constraint_bounds(...); + + // Parse settings: + // settings.time_limit = ...; + // settings.iteration_limit = ...; + + return true; + } catch (...) { + return false; + } + } + + bool deserialize_mip_request( + const std::vector& data, + mps_parser::mps_data_model_t& mps_data, + mip_solver_settings_t& settings) override + { + // Similar to LP, also set variable types for integers/binaries: + // mps_data.set_variable_types(var_types); + return true; + } + + std::vector serialize_lp_solution( + const optimization_problem_solution_t& solution) override + { + std::vector buffer; + buffer.push_back(MSG_LP_SOLUTION); + + // NOTE: Server calls solution.to_host() before serialization, + // so solution data is always in CPU memory. Use: + // solution.get_primal_solution_host() + // solution.get_dual_solution_host() + // solution.get_reduced_cost_host() + + // Serialize termination status, objective, solution vectors, etc. + + return buffer; + } + + std::vector serialize_mip_solution( + const mip_solution_t& solution) override + { + std::vector buffer; + buffer.push_back(MSG_MIP_SOLUTION); + + // Use solution.get_solution_host() for the solution vector + + return buffer; + } +}; + +//========================================================================== +// FACTORY FUNCTIONS - Must be exported with C linkage +//========================================================================== + +template +std::unique_ptr> create_serializer_impl() +{ + return std::make_unique>(); +} + +} // namespace cuopt::linear_programming + +// Export factory functions with C linkage for dlopen/dlsym +extern "C" { + +std::unique_ptr> +create_cuopt_serializer_i32_f64() +{ + return cuopt::linear_programming::create_serializer_impl(); +} + +std::unique_ptr> +create_cuopt_serializer_i32_f32() +{ + return cuopt::linear_programming::create_serializer_impl(); +} + +// Add more type combinations as needed + +} +``` + +### Step 2: Create CMakeLists.txt for the Plugin + +Create `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt`: + +```cmake +# Build the custom serializer as a shared library plugin +add_library(cuopt_my_serializer SHARED my_serializer.cpp) + +target_link_libraries(cuopt_my_serializer + PRIVATE + cuopt # Link against cuOpt for solution types +) + +target_include_directories(cuopt_my_serializer + PRIVATE + ${CMAKE_SOURCE_DIR}/include +) + +# Set RPATH so the plugin can find libcuopt.so +set_target_properties(cuopt_my_serializer PROPERTIES + INSTALL_RPATH "$ORIGIN" +) + +install(TARGETS cuopt_my_serializer + DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +``` + +### Step 3: Add to Parent CMakeLists.txt + +In `cpp/CMakeLists.txt`, add: + +```cmake +add_subdirectory(src/linear_programming/utilities/serializers) +``` + +### Step 4: Build the Plugin + +```bash +# Build everything including the plugin +./build.sh libcuopt cuopt_remote_server + +# Or just the plugin (after initial build) +cd cpp/build +ninja cuopt_my_serializer +``` + +## Using the Plugin + +### Environment Variable + +Set `CUOPT_SERIALIZER_LIB` to point to your plugin: + +```bash +export CUOPT_SERIALIZER_LIB=/path/to/libcuopt_my_serializer.so +``` + +### Running Server with Custom Serializer + +```bash +# Set the serializer library +export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so + +# Start the server +cuopt_remote_server -p 8765 +``` + +Server output will show: +``` +[remote_solve] Loading custom serializer from: /path/to/libcuopt_my_serializer.so +[remote_solve] Using custom serializer: my_format +``` + +### Running Client with Custom Serializer + +```bash +# Same serializer must be used on client side +export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so +export CUOPT_REMOTE_HOST=localhost +export CUOPT_REMOTE_PORT=8765 + +# Run cuopt_cli +cuopt_cli problem.mps + +# Or Python +python my_solver_script.py +``` + +### Complete Example Session + +```bash +# Terminal 1: Start server with msgpack serializer +export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so +cuopt_remote_server -p 8765 + +# Terminal 2: Run client with same serializer +export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so +export CUOPT_REMOTE_HOST=localhost +export CUOPT_REMOTE_PORT=8765 +cuopt_cli /path/to/problem.mps +``` + +## Data Model Reference + +### Problem Data (`data_model_view_t`) + +Key getters for serializing problem data: + +```cpp +// Problem metadata +view.get_problem_name() // std::string +view.get_objective_name() // std::string +view.get_sense() // bool (true = maximize) +view.get_objective_scaling_factor() // f_t +view.get_objective_offset() // f_t + +// Constraint matrix (CSR format) +view.get_constraint_matrix_values() // span +view.get_constraint_matrix_indices() // span +view.get_constraint_matrix_offsets() // span + +// Objective and bounds +view.get_objective_coefficients() // span +view.get_variable_lower_bounds() // span +view.get_variable_upper_bounds() // span +view.get_constraint_lower_bounds() // span +view.get_constraint_upper_bounds() // span + +// For MIP problems +view.get_variable_types() // span ('C', 'I', 'B') + +// Names (optional) +view.get_variable_names() // vector +view.get_row_names() // vector +``` + +### Problem Data (`mps_data_model_t`) - Server Side + +Key setters for deserializing: + +```cpp +mps_data.set_problem_name(name); +mps_data.set_objective_name(name); +mps_data.set_maximize(bool); +mps_data.set_objective_scaling_factor(factor); +mps_data.set_objective_offset(offset); + +mps_data.set_objective_coefficients(ptr, size); +mps_data.set_csr_constraint_matrix(values, nvals, indices, nidx, offsets, noff); +mps_data.set_variable_bounds(lower, upper, size); +mps_data.set_constraint_bounds(lower, upper, size); + +// For MIP +mps_data.set_variable_types(std::vector); +``` + +### LP Solution (`optimization_problem_solution_t`) + +```cpp +// Getters (for serialization) +solution.get_termination_status() // pdlp_termination_status_t +solution.get_objective_value() // f_t +solution.get_primal_solution_host() // vector& +solution.get_dual_solution_host() // vector& +solution.get_reduced_cost_host() // vector& +solution.get_solve_time() // double +solution.get_l2_primal_residual() // f_t +solution.get_l2_dual_residual() // f_t +solution.get_gap() // f_t +solution.get_nb_iterations() // i_t + +// Setters (for deserialization on client) +solution.set_termination_status(status); +solution.set_objective_value(value); +solution.set_primal_solution_host(vector); +solution.set_dual_solution_host(vector); +solution.set_reduced_cost_host(vector); +solution.set_solve_time(time); +// ... etc +``` + +### MIP Solution (`mip_solution_t`) + +```cpp +// Getters +solution.get_termination_status() // mip_termination_status_t +solution.get_objective_value() // f_t +solution.get_solution_host() // vector& +solution.get_total_solve_time() // double +solution.get_mip_gap() // f_t + +// Setters +solution.set_solution_host(vector); +solution.set_objective_value(value); +solution.set_mip_gap(gap); +// ... etc +``` + +## Tips and Best Practices + +### 1. Message Type Identification + +Always include a message type identifier as the first byte(s): + +```cpp +constexpr uint8_t MSG_LP_REQUEST = 1; +constexpr uint8_t MSG_MIP_REQUEST = 2; +constexpr uint8_t MSG_LP_SOLUTION = 3; +constexpr uint8_t MSG_MIP_SOLUTION = 4; +``` + +### 2. Version Compatibility + +Include a protocol version in your messages for future compatibility: + +```cpp +// In serialize: +buffer.push_back(MSG_LP_REQUEST); +buffer.push_back(PROTOCOL_VERSION); + +// In deserialize: +uint8_t version = data[1]; +if (version != PROTOCOL_VERSION) { + // Handle version mismatch +} +``` + +### 3. Error Handling + +Return proper error solutions on parse failures: + +```cpp +optimization_problem_solution_t deserialize_lp_solution(...) { + try { + // Parse... + } catch (const std::exception& e) { + return optimization_problem_solution_t( + cuopt::logic_error( + std::string("Deserialize error: ") + e.what(), + cuopt::error_type_t::RuntimeError)); + } +} +``` + +### 4. Solution Memory + +The server calls `solution.to_host()` before serialization, so: +- Always use `get_*_host()` methods for solution data +- No need to handle GPU memory in your serializer + +### 5. Testing + +Test your serializer with both LP and MIP problems: + +```bash +# LP test +cuopt_cli /path/to/lp_problem.mps + +# MIP test (use a problem with integer variables) +cuopt_cli /path/to/mip_problem.mps +``` + +## Reference Implementation + +See the MsgPack serializer for a complete working example: +- `cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp` +- `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt` + +## Troubleshooting + +### "Failed to load serializer library" + +- Check the path in `CUOPT_SERIALIZER_LIB` is correct +- Ensure the library was built: `ls $CONDA_PREFIX/lib/libcuopt_*serializer.so` + +### "Factory function not found" + +- Ensure factory functions are exported with `extern "C"` +- Check function names match: `create_cuopt_serializer_i32_f64`, etc. + +### "Read failed" / Malformed messages + +- Ensure client and server use the **same** serializer +- Check message framing is consistent +- Verify all required fields are serialized + +### Symbol errors at runtime + +- Rebuild and reinstall with `./build.sh libcuopt cuopt_remote_server` +- Ensure plugin links against `cuopt` From a9b0769b04b3972429593dbc41f74cfb73fbf6de Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 6 Jan 2026 17:22:55 -0500 Subject: [PATCH 13/37] Add async job workflow and file-based log streaming to remote server Features: - Async job management: SUBMIT_JOB, CHECK_STATUS, GET_RESULT, DELETE_RESULT, GET_LOGS - Worker processes with shared memory job queues (fork-based, not threads) - File-based log streaming for async mode (matches Python server architecture) - Per-job log files: /tmp/cuopt_logs/log_{job_id} - Incremental reads via byte offset (frombyte parameter) - Automatic cleanup on job deletion - Sync mode still uses real-time socket streaming - CUOPT_REMOTE_USE_SYNC=1 env var to force sync mode Protocol additions: - RequestType.GET_LOGS for log retrieval - AsyncRequest.frombyte for incremental log reads - LogsResponse with log_lines, nbytes, job_exists Client modes: - Sync (blocking=true): Real-time log streaming over socket - Async (blocking=false): Submit -> Poll (logs + status) -> Get -> Delete --- cpp/cuopt_remote_server.cpp | 915 ++++++++++++++---- .../utilities/remote_serialization.hpp | 135 +++ .../utilities/cuopt_remote.proto | 14 +- .../utilities/protobuf_serializer.cu | 229 +++++ .../utilities/remote_solve.cu | 374 +++++-- 5 files changed, 1427 insertions(+), 240 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 63fbe7680..8ec84364a 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -8,11 +8,20 @@ * @brief Remote solve server with sync and async support using pluggable serialization * * Features: - * - Sync mode: Submit job, wait for result, return immediately - * - Async mode: Submit job, get job_id, poll for status, retrieve result + * - Sync mode: Submit job with blocking=true, wait for result, return immediately + * - Async mode: Submit job, get job_id, poll for status, retrieve result, delete * - Uses pluggable serialization (default: Protocol Buffers) - * - Threaded request handling - * - Real-time log streaming to client + * - Worker processes with shared memory job queues + * - Real-time log streaming to client (sync mode only) + * + * Async workflow: + * 1. Client sends SUBMIT_JOB request → Server returns job_id + * 2. Client sends CHECK_STATUS request → Server returns job status + * 3. Client sends GET_RESULT request → Server returns solution + * 4. Client sends DELETE_RESULT request → Server cleans up job + * + * Sync workflow: + * 1. Client sends SUBMIT_JOB with blocking=true → Server solves and returns result directly */ #include @@ -20,8 +29,12 @@ #include #include +#include #include +#include #include +#include +#include #include #include @@ -30,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +54,44 @@ using namespace cuopt::linear_programming; +// ============================================================================ +// Shared Memory Structures (must match between main process and workers) +// ============================================================================ + +constexpr size_t MAX_JOBS = 100; +constexpr size_t MAX_RESULTS = 100; +constexpr size_t MAX_JOB_DATA_SIZE = 64 * 1024 * 1024; // 64MB max problem size +constexpr size_t MAX_RESULT_SIZE = 128 * 1024 * 1024; // 128MB max result size + +struct JobQueueEntry { + char job_id[64]; + uint32_t problem_type; // 0 = LP, 1 = MIP + uint32_t data_size; + uint8_t data[MAX_JOB_DATA_SIZE]; + std::atomic ready; // Job is ready to be processed + std::atomic claimed; // Worker has claimed this job +}; + +struct ResultQueueEntry { + char job_id[64]; + uint32_t status; // 0 = success, 1 = error + uint32_t data_size; + uint8_t data[MAX_RESULT_SIZE]; + char error_message[1024]; + std::atomic ready; // Result is ready + std::atomic retrieved; // Result has been retrieved +}; + +// Shared memory control block +struct SharedMemoryControl { + std::atomic shutdown_requested; + std::atomic active_workers; +}; + +// ============================================================================ // Message types for streaming protocol +// ============================================================================ + enum class MessageType : uint8_t { LOG_MESSAGE = 0, // Log output from server SOLUTION = 1, // Final solution data @@ -49,15 +100,11 @@ enum class MessageType : uint8_t { // Helper to send a framed message with type static bool send_typed_message(int sockfd, MessageType type, const void* data, size_t size) { - // Message format: [type:1][size:4][payload:size] uint8_t msg_type = static_cast(type); uint32_t payload_size = static_cast(size); - // Write type if (::write(sockfd, &msg_type, 1) != 1) return false; - // Write size if (::write(sockfd, &payload_size, 4) != 4) return false; - // Write payload if (size > 0) { const uint8_t* ptr = static_cast(data); size_t remaining = size; @@ -71,12 +118,10 @@ static bool send_typed_message(int sockfd, MessageType type, const void* data, s return true; } -/** - * @brief RAII class to redirect stdout to a pipe and stream output to client - * - * This captures all stdout output from the solver and sends it to the client - * in real-time while also echoing to the original stdout (server console). - */ +// ============================================================================ +// RAII stdout streamer for log streaming to client +// ============================================================================ + class stdout_streamer_t { public: stdout_streamer_t(int client_fd, bool enabled) @@ -84,17 +129,14 @@ class stdout_streamer_t { { if (!enabled_) return; - // Flush any buffered stdout to prevent old content from being captured fflush(stdout); - // Create pipe if (pipe(pipe_fds_) < 0) { std::cerr << "[Server] Failed to create pipe for stdout streaming\n"; enabled_ = false; return; } - // Save original stdout original_stdout_ = dup(STDOUT_FILENO); if (original_stdout_ < 0) { close(pipe_fds_[0]); @@ -103,7 +145,6 @@ class stdout_streamer_t { return; } - // Redirect stdout to pipe if (dup2(pipe_fds_[1], STDOUT_FILENO) < 0) { close(original_stdout_); close(pipe_fds_[0]); @@ -112,10 +153,8 @@ class stdout_streamer_t { return; } - // Close write end of pipe (stdout now writes to it) close(pipe_fds_[1]); - // Start reader thread running_ = true; reader_thread_ = std::thread(&stdout_streamer_t::reader_loop, this); } @@ -124,18 +163,13 @@ class stdout_streamer_t { { if (!enabled_) return; - // Flush stdout to ensure all output is in the pipe fflush(stdout); - - // Restore original stdout dup2(original_stdout_, STDOUT_FILENO); close(original_stdout_); - // Signal reader to stop and close pipe read end running_ = false; close(pipe_fds_[0]); - // Wait for reader thread if (reader_thread_.joinable()) { reader_thread_.join(); } } @@ -149,10 +183,7 @@ class stdout_streamer_t { buffer[n] = '\0'; - // Echo to original stdout (server console) if (original_stdout_ >= 0) { write(original_stdout_, buffer, n); } - - // Send to client send_typed_message(client_fd_, MessageType::LOG_MESSAGE, buffer, n); } } @@ -165,46 +196,87 @@ class stdout_streamer_t { std::thread reader_thread_; }; -// Job status +// ============================================================================ +// Job status tracking (main process only) +// ============================================================================ + enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; -// Job info for tracking struct JobInfo { std::string job_id; JobStatus status; std::chrono::steady_clock::time_point submit_time; - std::vector request_data; // Stored request - std::vector result_data; // Stored result - bool is_mip; // true for MIP, false for LP + std::vector result_data; + bool is_mip; + std::string error_message; + bool is_blocking; // True if a client is waiting synchronously +}; + +// Per-job condition variable for synchronous waiting +struct JobWaiter { + std::mutex mutex; + std::condition_variable cv; + std::vector result_data; std::string error_message; + bool success; + bool ready; + + JobWaiter() : success(false), ready(false) {} }; +// ============================================================================ // Global state +// ============================================================================ + std::atomic keep_running{true}; std::map job_tracker; std::mutex tracker_mutex; -std::condition_variable job_cv; +std::condition_variable result_cv; // Notified when results arrive + +std::map> waiting_threads; +std::mutex waiters_mutex; + +// Shared memory +JobQueueEntry* job_queue = nullptr; +ResultQueueEntry* result_queue = nullptr; +SharedMemoryControl* shm_ctrl = nullptr; + +// Worker PIDs +std::vector worker_pids; // Server configuration struct ServerConfig { int port = 9090; int num_workers = 1; bool verbose = true; - bool stream_logs = true; // Enable real-time log streaming to clients + bool stream_logs = true; }; ServerConfig config; +// Shared memory names +const char* SHM_JOB_QUEUE = "/cuopt_job_queue"; +const char* SHM_RESULT_QUEUE = "/cuopt_result_queue"; +const char* SHM_CONTROL = "/cuopt_control"; + +// ============================================================================ +// Signal handling +// ============================================================================ + void signal_handler(int signal) { if (signal == SIGINT || signal == SIGTERM) { std::cout << "\n[Server] Received shutdown signal\n"; keep_running = false; - job_cv.notify_all(); + if (shm_ctrl) { shm_ctrl->shutdown_requested = true; } + result_cv.notify_all(); } } -// Generate unique job ID +// ============================================================================ +// Utilities +// ============================================================================ + std::string generate_job_id() { static std::random_device rd; @@ -217,7 +289,6 @@ std::string generate_job_id() return std::string(buf); } -// Socket helpers static bool write_all(int sockfd, const void* data, size_t size) { const uint8_t* ptr = static_cast(data); @@ -244,16 +315,6 @@ static bool read_all(int sockfd, void* data, size_t size) return true; } -static bool send_response(int sockfd, const std::vector& data) -{ - // Legacy response format (for non-streaming clients) - uint32_t size = static_cast(data.size()); - if (!write_all(sockfd, &size, sizeof(size))) return false; - if (!write_all(sockfd, data.data(), data.size())) return false; - return true; -} - -// Send solution using streaming protocol static bool send_solution_message(int sockfd, const std::vector& data) { return send_typed_message(sockfd, MessageType::SOLUTION, data.data(), data.size()); @@ -264,8 +325,7 @@ static bool receive_request(int sockfd, std::vector& data) uint32_t size; if (!read_all(sockfd, &size, sizeof(size))) return false; - // Sanity check - if (size > 100 * 1024 * 1024) { // Max 100MB + if (size > 100 * 1024 * 1024) { std::cerr << "[Server] Request too large: " << size << " bytes\n"; return false; } @@ -275,10 +335,122 @@ static bool receive_request(int sockfd, std::vector& data) return true; } -// Worker thread - processes jobs from the queue -void worker_thread(int worker_id) +// ============================================================================ +// Shared Memory Management +// ============================================================================ + +bool init_shared_memory() { - std::cout << "[Worker " << worker_id << "] Started\n"; + // Create job queue shared memory + int fd_jobs = shm_open(SHM_JOB_QUEUE, O_CREAT | O_RDWR, 0666); + if (fd_jobs < 0) { + std::cerr << "[Server] Failed to create job queue shared memory\n"; + return false; + } + size_t job_queue_size = sizeof(JobQueueEntry) * MAX_JOBS; + if (ftruncate(fd_jobs, job_queue_size) < 0) { + std::cerr << "[Server] Failed to size job queue shared memory\n"; + close(fd_jobs); + return false; + } + job_queue = static_cast( + mmap(nullptr, job_queue_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_jobs, 0)); + close(fd_jobs); + if (job_queue == MAP_FAILED) { + std::cerr << "[Server] Failed to map job queue\n"; + return false; + } + + // Initialize job queue entries + for (size_t i = 0; i < MAX_JOBS; ++i) { + job_queue[i].ready = false; + job_queue[i].claimed = false; + } + + // Create result queue shared memory + int fd_results = shm_open(SHM_RESULT_QUEUE, O_CREAT | O_RDWR, 0666); + if (fd_results < 0) { + std::cerr << "[Server] Failed to create result queue shared memory\n"; + return false; + } + size_t result_queue_size = sizeof(ResultQueueEntry) * MAX_RESULTS; + if (ftruncate(fd_results, result_queue_size) < 0) { + std::cerr << "[Server] Failed to size result queue shared memory\n"; + close(fd_results); + return false; + } + result_queue = static_cast( + mmap(nullptr, result_queue_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_results, 0)); + close(fd_results); + if (result_queue == MAP_FAILED) { + std::cerr << "[Server] Failed to map result queue\n"; + return false; + } + + // Initialize result queue entries + for (size_t i = 0; i < MAX_RESULTS; ++i) { + result_queue[i].ready = false; + result_queue[i].retrieved = false; + } + + // Create control shared memory + int fd_ctrl = shm_open(SHM_CONTROL, O_CREAT | O_RDWR, 0666); + if (fd_ctrl < 0) { + std::cerr << "[Server] Failed to create control shared memory\n"; + return false; + } + if (ftruncate(fd_ctrl, sizeof(SharedMemoryControl)) < 0) { + std::cerr << "[Server] Failed to size control shared memory\n"; + close(fd_ctrl); + return false; + } + shm_ctrl = static_cast( + mmap(nullptr, sizeof(SharedMemoryControl), PROT_READ | PROT_WRITE, MAP_SHARED, fd_ctrl, 0)); + close(fd_ctrl); + if (shm_ctrl == MAP_FAILED) { + std::cerr << "[Server] Failed to map control\n"; + return false; + } + + shm_ctrl->shutdown_requested = false; + shm_ctrl->active_workers = 0; + + return true; +} + +void cleanup_shared_memory() +{ + if (job_queue) { + munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS); + shm_unlink(SHM_JOB_QUEUE); + } + if (result_queue) { + munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS); + shm_unlink(SHM_RESULT_QUEUE); + } + if (shm_ctrl) { + munmap(shm_ctrl, sizeof(SharedMemoryControl)); + shm_unlink(SHM_CONTROL); + } +} + +// ============================================================================ +// Forward declarations for log file management +// ============================================================================ +std::string get_log_file_path(const std::string& job_id); +void ensure_log_dir_exists(); +void delete_log_file(const std::string& job_id); + +// ============================================================================ +// Worker Process +// ============================================================================ + +void worker_process(int worker_id) +{ + std::cout << "[Worker " << worker_id << "] Started (PID: " << getpid() << ")\n"; + + // Increment active worker count + shm_ctrl->active_workers++; // Create RAFT handle for GPU operations raft::handle_t handle; @@ -286,104 +458,69 @@ void worker_thread(int worker_id) // Get serializer auto serializer = get_serializer(); - while (keep_running) { - std::string job_id; - std::vector request_data; - bool is_mip = false; - - // Find a queued job - { - std::unique_lock lock(tracker_mutex); - job_cv.wait(lock, []() { - if (!keep_running) return true; - for (const auto& [id, info] : job_tracker) { - if (info.status == JobStatus::QUEUED) return true; - } - return false; - }); - - if (!keep_running) break; - - // Find and claim a job - for (auto& [id, info] : job_tracker) { - if (info.status == JobStatus::QUEUED) { - info.status = JobStatus::PROCESSING; - job_id = id; - request_data = info.request_data; - is_mip = info.is_mip; + while (!shm_ctrl->shutdown_requested) { + // Find a job to process + int job_slot = -1; + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (job_queue[i].ready && !job_queue[i].claimed) { + // Try to claim this job atomically + bool expected = false; + if (job_queue[i].claimed.compare_exchange_strong(expected, true)) { + job_slot = i; break; } } } - if (job_id.empty()) continue; + if (job_slot < 0) { + // No job available, sleep briefly + usleep(10000); // 10ms + continue; + } - if (config.verbose) { - std::cout << "[Worker " << worker_id << "] Processing job: " << job_id - << " (type: " << (is_mip ? "MIP" : "LP") << ")\n"; + // Process the job + JobQueueEntry& job = job_queue[job_slot]; + std::string job_id(job.job_id); + bool is_mip = (job.problem_type == 1); + + std::cout << "[Worker " << worker_id << "] Processing job: " << job_id + << " (type: " << (is_mip ? "MIP" : "LP") << ")\n"; + std::cout.flush(); + + // Redirect stdout to per-job log file for client log retrieval + std::string log_file = get_log_file_path(job_id); + int saved_stdout = dup(STDOUT_FILENO); + int log_fd = open(log_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (log_fd >= 0) { + dup2(log_fd, STDOUT_FILENO); + close(log_fd); } + std::vector request_data(job.data, job.data + job.data_size); std::vector result_data; std::string error_message; bool success = false; try { if (is_mip) { - // Deserialize MIP request cuopt::mps_parser::mps_data_model_t mps_data; mip_solver_settings_t settings; if (serializer->deserialize_mip_request(request_data, mps_data, settings)) { - // Solve using the data model directly auto solution = solve_mip(&handle, mps_data, settings); - - // Move solution to CPU memory for serialization - // (remote solve clients are typically CPU-only hosts) solution.to_host(handle.get_stream()); - - // Serialize result result_data = serializer->serialize_mip_solution(solution); success = true; } else { error_message = "Failed to deserialize MIP request"; } } else { - // Deserialize LP request cuopt::mps_parser::mps_data_model_t mps_data; pdlp_solver_settings_t settings; if (serializer->deserialize_lp_request(request_data, mps_data, settings)) { - // Debug: print deserialized data - std::cout << "[Server DEBUG] Deserialized LP problem:\n"; - std::cout << " Maximize: " << mps_data.get_sense() << "\n"; - std::cout << " Objective coeffs: ["; - for (size_t i = 0; i < mps_data.get_objective_coefficients().size(); ++i) { - std::cout << mps_data.get_objective_coefficients()[i]; - if (i + 1 < mps_data.get_objective_coefficients().size()) std::cout << ", "; - } - std::cout << "]\n"; - std::cout << " Constraint lower bounds: ["; - for (size_t i = 0; i < mps_data.get_constraint_lower_bounds().size(); ++i) { - std::cout << mps_data.get_constraint_lower_bounds()[i]; - if (i + 1 < mps_data.get_constraint_lower_bounds().size()) std::cout << ", "; - } - std::cout << "]\n"; - std::cout << " Constraint upper bounds: ["; - for (size_t i = 0; i < mps_data.get_constraint_upper_bounds().size(); ++i) { - std::cout << mps_data.get_constraint_upper_bounds()[i]; - if (i + 1 < mps_data.get_constraint_upper_bounds().size()) std::cout << ", "; - } - std::cout << "]\n"; - std::cout.flush(); - - // Solve using the data model directly auto solution = solve_lp(&handle, mps_data, settings); - - // Move solution to CPU memory for serialization - // (remote solve clients are typically CPU-only hosts) solution.to_host(handle.get_stream()); - - // Serialize result result_data = serializer->serialize_lp_solution(solution); success = true; } else { @@ -394,60 +531,300 @@ void worker_thread(int worker_id) error_message = std::string("Exception: ") + e.what(); } - // Update job status - { - std::lock_guard lock(tracker_mutex); - auto it = job_tracker.find(job_id); - if (it != job_tracker.end()) { + // Restore stdout to console + fflush(stdout); + dup2(saved_stdout, STDOUT_FILENO); + close(saved_stdout); + + // Store result in result queue + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (!result_queue[i].ready) { + bool expected = false; + // Claim this result slot + ResultQueueEntry& result = result_queue[i]; + strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); + result.status = success ? 0 : 1; if (success) { - it->second.status = JobStatus::COMPLETED; - it->second.result_data = std::move(result_data); + result.data_size = std::min(result_data.size(), MAX_RESULT_SIZE); + memcpy(result.data, result_data.data(), result.data_size); } else { - it->second.status = JobStatus::FAILED; - it->second.error_message = error_message; + strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); + result.data_size = 0; } + result.retrieved = false; + result.ready = true; // Mark as ready last + break; } } - job_cv.notify_all(); - if (config.verbose) { - std::cout << "[Worker " << worker_id << "] Completed job: " << job_id - << " (success: " << success << ")\n"; - } + // Clear job slot + job.ready = false; + job.claimed = false; + + std::cout << "[Worker " << worker_id << "] Completed job: " << job_id + << " (success: " << success << ")\n"; } + shm_ctrl->active_workers--; std::cout << "[Worker " << worker_id << "] Stopped\n"; + _exit(0); } -// Handle client connection with streaming log support -void handle_client(int client_fd, bool stream_logs) +void spawn_workers() { - auto serializer = get_serializer(); + for (int i = 0; i < config.num_workers; ++i) { + pid_t pid = fork(); + if (pid < 0) { + std::cerr << "[Server] Failed to fork worker " << i << "\n"; + } else if (pid == 0) { + // Child process + worker_process(i); + _exit(0); // Should not reach here + } else { + // Parent process + worker_pids.push_back(pid); + } + } +} - // Receive request - std::vector request_data; - if (!receive_request(client_fd, request_data)) { - std::cerr << "[Server] Failed to receive request\n"; - close(client_fd); - return; +void wait_for_workers() +{ + for (pid_t pid : worker_pids) { + int status; + waitpid(pid, &status, 0); } + worker_pids.clear(); +} - if (config.verbose) { - std::cout << "[Server] Received request, size: " << request_data.size() << " bytes\n"; +// ============================================================================ +// Result Retrieval Thread (main process) +// ============================================================================ + +void result_retrieval_thread() +{ + std::cout << "[Server] Result retrieval thread started\n"; + + while (keep_running) { + bool found = false; + + // Check for completed results + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (result_queue[i].ready && !result_queue[i].retrieved) { + std::string job_id(result_queue[i].job_id); + bool success = (result_queue[i].status == 0); + + std::vector result_data; + std::string error_message; + + if (success) { + result_data.assign(result_queue[i].data, + result_queue[i].data + result_queue[i].data_size); + } else { + error_message = result_queue[i].error_message; + } + + // Check if there's a blocking waiter + { + std::lock_guard lock(waiters_mutex); + auto wit = waiting_threads.find(job_id); + if (wit != waiting_threads.end()) { + // Wake up the waiting thread + auto waiter = wit->second; + waiter->result_data = std::move(result_data); + waiter->error_message = error_message; + waiter->success = success; + waiter->ready = true; + waiter->cv.notify_one(); + } + } + + // Update job tracker + { + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + if (it != job_tracker.end()) { + if (success) { + it->second.status = JobStatus::COMPLETED; + it->second.result_data = result_data; + } else { + it->second.status = JobStatus::FAILED; + it->second.error_message = error_message; + } + } + } + + result_queue[i].retrieved = true; + result_queue[i].ready = false; // Free slot + found = true; + } + } + + if (!found) { + usleep(10000); // 10ms + } + + result_cv.notify_all(); + } + + std::cout << "[Server] Result retrieval thread stopped\n"; +} + +// ============================================================================ +// Async Request Handlers +// ============================================================================ + +// Submit a job asynchronously (returns job_id) +std::pair submit_job_async(const std::vector& request_data, bool is_mip) +{ + std::string job_id = generate_job_id(); + + if (request_data.size() > MAX_JOB_DATA_SIZE) { return {false, "Request data too large"}; } + + // Find free job slot + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (!job_queue[i].ready && !job_queue[i].claimed) { + strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1); + job_queue[i].problem_type = is_mip ? 1 : 0; + job_queue[i].data_size = request_data.size(); + memcpy(job_queue[i].data, request_data.data(), request_data.size()); + job_queue[i].claimed = false; + job_queue[i].ready = true; // Mark as ready last + + // Track job + { + std::lock_guard lock(tracker_mutex); + JobInfo info; + info.job_id = job_id; + info.status = JobStatus::QUEUED; + info.submit_time = std::chrono::steady_clock::now(); + info.is_mip = is_mip; + info.is_blocking = false; + job_tracker[job_id] = info; + } + + if (config.verbose) { std::cout << "[Server] Job submitted (async): " << job_id << "\n"; } + + return {true, job_id}; + } + } + + return {false, "Job queue full"}; +} + +// Check job status +JobStatus check_job_status(const std::string& job_id, std::string& message) +{ + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + if (it == job_tracker.end()) { + message = "Job ID not found"; + return JobStatus::NOT_FOUND; + } + + switch (it->second.status) { + case JobStatus::QUEUED: message = "Job is queued"; break; + case JobStatus::PROCESSING: message = "Job is being processed"; break; + case JobStatus::COMPLETED: message = "Job completed"; break; + case JobStatus::FAILED: message = "Job failed: " + it->second.error_message; break; + default: message = "Unknown status"; + } + + return it->second.status; +} + +// Get job result +bool get_job_result(const std::string& job_id, + std::vector& result_data, + std::string& error_message) +{ + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + if (it == job_tracker.end()) { + error_message = "Job ID not found"; + return false; + } + + if (it->second.status == JobStatus::COMPLETED) { + result_data = it->second.result_data; + return true; + } else if (it->second.status == JobStatus::FAILED) { + error_message = it->second.error_message; + return false; + } else { + error_message = "Job not completed yet"; + return false; } +} - // Determine request type - bool is_mip = serializer->is_mip_request(request_data); +// ============================================================================ +// Log File Management +// ============================================================================ + +// Directory for per-job log files +const std::string LOG_DIR = "/tmp/cuopt_logs"; + +// Get the log file path for a given job_id +std::string get_log_file_path(const std::string& job_id) { return LOG_DIR + "/log_" + job_id; } + +// Ensure log directory exists +void ensure_log_dir_exists() +{ + struct stat st; + if (stat(LOG_DIR.c_str(), &st) != 0) { mkdir(LOG_DIR.c_str(), 0755); } +} + +// Delete log file for a job +void delete_log_file(const std::string& job_id) +{ + std::string log_file = get_log_file_path(job_id); + unlink(log_file.c_str()); // Ignore errors if file doesn't exist +} - // For sync mode with streaming, process directly in this thread - // to enable log streaming to the client +// Delete job +bool delete_job(const std::string& job_id) +{ + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + if (it == job_tracker.end()) { return false; } + + job_tracker.erase(it); + + // Also delete the log file + delete_log_file(job_id); + + if (config.verbose) { std::cout << "[Server] Job deleted: " << job_id << "\n"; } + + return true; +} + +// ============================================================================ +// Sync Mode Handler (with log streaming) +// ============================================================================ + +/** + * @brief Handle synchronous (blocking) solve requests directly. + * + * For sync mode, we solve directly in the main thread instead of using worker + * processes. This allows stdout log streaming to work correctly since the + * stdout_streamer_t captures output from the same process. + */ +void handle_sync_solve(int client_fd, + const std::vector& request_data, + bool is_mip, + bool stream_logs) +{ std::string job_id = generate_job_id(); if (config.verbose) { - std::cout << "[Server] Processing job: " << job_id << " (type: " << (is_mip ? "MIP" : "LP") - << ", streaming: " << (stream_logs ? "yes" : "no") << ")\n"; + std::cout << "[Server] Sync solve request, job_id: " << job_id + << " (streaming: " << (stream_logs ? "yes" : "no") << ")\n"; } + auto serializer = get_serializer(); + // Create RAFT handle for GPU operations raft::handle_t handle; @@ -467,8 +844,9 @@ void handle_client(int client_fd, bool stream_logs) if (serializer->deserialize_mip_request(request_data, mps_data, settings)) { auto solution = solve_mip(&handle, mps_data, settings); - result_data = serializer->serialize_mip_solution(solution); - success = true; + solution.to_host(handle.get_stream()); + result_data = serializer->serialize_mip_solution(solution); + success = true; } else { error_message = "Failed to deserialize MIP request"; } @@ -478,8 +856,9 @@ void handle_client(int client_fd, bool stream_logs) if (serializer->deserialize_lp_request(request_data, mps_data, settings)) { auto solution = solve_lp(&handle, mps_data, settings); - result_data = serializer->serialize_lp_solution(solution); - success = true; + solution.to_host(handle.get_stream()); + result_data = serializer->serialize_lp_solution(solution); + success = true; } else { error_message = "Failed to deserialize LP request"; } @@ -490,35 +869,177 @@ void handle_client(int client_fd, bool stream_logs) } // streamer destructor restores stdout if (config.verbose) { - std::cout << "[Server] Completed job: " << job_id << " (success: " << success << ")\n"; + std::cout << "[Server] Sync solve completed: " << job_id << " (success: " << success << ")\n"; } - // Send response - always use streaming protocol for consistency - // (stream_logs only controls whether LOG_MESSAGE are sent during solve) - std::cout << "[Server] Sending solution message, size = " << result_data.size() << " bytes\n"; - std::cout.flush(); - if (!send_solution_message(client_fd, result_data)) { - std::cerr << "[Server] Failed to send solution message\n"; + // Send result to client + if (success) { + std::cout << "[Server] Sending solution message, size = " << result_data.size() << " bytes\n"; + send_solution_message(client_fd, result_data); } else { - std::cout << "[Server] Solution message sent successfully\n"; - std::cout.flush(); + std::cerr << "[Server] Sync solve failed: " << error_message << "\n"; + // Send empty solution to indicate failure + std::vector empty; + send_solution_message(client_fd, empty); } close(client_fd); } -// Legacy handle_client without streaming (backward compatible) -void handle_client(int client_fd) { handle_client(client_fd, false); } +// ============================================================================ +// Client Connection Handler +// ============================================================================ + +void handle_client(int client_fd, bool stream_logs) +{ + auto serializer = get_serializer(); + + // Receive request + std::vector request_data; + if (!receive_request(client_fd, request_data)) { + std::cerr << "[Server] Failed to receive request\n"; + close(client_fd); + return; + } + + if (config.verbose) { + std::cout << "[Server] Received request, size: " << request_data.size() << " bytes\n"; + } + + // Determine if this is an async protocol request + bool is_async_request = serializer->is_async_request(request_data); + + if (is_async_request) { + // Parse async request type and handle accordingly + auto request_type = serializer->get_async_request_type(request_data); + + if (request_type == 0) { // SUBMIT_JOB + bool blocking = serializer->is_blocking_request(request_data); + bool is_mip = serializer->is_mip_request(request_data); + + // Extract the actual problem data from the async request + std::vector problem_data = serializer->extract_problem_data(request_data); + + if (blocking) { + // Sync mode - handle with log streaming + handle_sync_solve(client_fd, problem_data, is_mip, stream_logs); + return; + } else { + // Async mode - submit and return job_id + auto [success, result] = submit_job_async(problem_data, is_mip); + auto response = serializer->serialize_submit_response(success, result); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + } + } else if (request_type == 1) { // CHECK_STATUS + std::string job_id = serializer->get_job_id(request_data); + std::string message; + JobStatus status = check_job_status(job_id, message); + + int status_code = 0; + switch (status) { + case JobStatus::QUEUED: status_code = 0; break; + case JobStatus::PROCESSING: status_code = 1; break; + case JobStatus::COMPLETED: status_code = 2; break; + case JobStatus::FAILED: status_code = 3; break; + case JobStatus::NOT_FOUND: status_code = 4; break; + } + + auto response = serializer->serialize_status_response(status_code, message); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + } else if (request_type == 2) { // GET_RESULT + std::string job_id = serializer->get_job_id(request_data); + std::vector result_data; + std::string error_message; + + bool success = get_job_result(job_id, result_data, error_message); + auto response = serializer->serialize_result_response(success, result_data, error_message); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + } else if (request_type == 3) { // DELETE_RESULT + std::string job_id = serializer->get_job_id(request_data); + bool success = delete_job(job_id); + + auto response = serializer->serialize_delete_response(success); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + } else if (request_type == 4) { // GET_LOGS + std::string job_id = serializer->get_job_id(request_data); + int64_t frombyte = serializer->get_frombyte(request_data); + + std::vector log_lines; + int64_t nbytes = 0; + bool job_exists = false; + + // Read logs from file + std::string log_file = get_log_file_path(job_id); + std::ifstream ifs(log_file); + if (ifs.is_open()) { + job_exists = true; + ifs.seekg(frombyte); + std::string line; + while (std::getline(ifs, line)) { + log_lines.push_back(line); + } + nbytes = ifs.tellg(); + if (nbytes < 0) { + // tellg returns -1 at EOF, get actual file size + ifs.clear(); + ifs.seekg(0, std::ios::end); + nbytes = ifs.tellg(); + } + ifs.close(); + } else { + // Check if job exists but log file doesn't (not started yet) + std::lock_guard lock(tracker_mutex); + job_exists = (job_tracker.find(job_id) != job_tracker.end()); + } + + auto response = serializer->serialize_logs_response(job_id, log_lines, nbytes, job_exists); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + + if (config.verbose) { + std::cout << "[Server] GET_LOGS: job=" << job_id << ", frombyte=" << frombyte + << ", lines=" << log_lines.size() << ", nbytes=" << nbytes << "\n"; + } + } + + close(client_fd); + } else { + // Legacy/simple request format - treat as sync LP/MIP request + bool is_mip = serializer->is_mip_request(request_data); + handle_sync_solve(client_fd, request_data, is_mip, stream_logs); + } +} + +// ============================================================================ +// Main +// ============================================================================ void print_usage(const char* prog) { std::cout << "Usage: " << prog << " [options]\n" << "Options:\n" << " -p PORT Port to listen on (default: 9090)\n" - << " -w NUM Number of worker threads (default: 1)\n" + << " -w NUM Number of worker processes (default: 1)\n" << " -q Quiet mode (less verbose output)\n" << " --no-stream Disable real-time log streaming to clients\n" - << " -h Show this help\n"; + << " -h Show this help\n" + << "\n" + << "Environment Variables (client-side):\n" + << " CUOPT_REMOTE_USE_SYNC=1 Force sync mode (default is async)\n"; } int main(int argc, char** argv) @@ -544,33 +1065,47 @@ int main(int argc, char** argv) signal(SIGTERM, signal_handler); // IMPORTANT: Clear remote solve environment variables to prevent infinite recursion - // The server should always do local solves, never try to connect to itself unsetenv("CUOPT_REMOTE_HOST"); unsetenv("CUOPT_REMOTE_PORT"); - std::cout << "=== cuOpt Remote Solve Server ===\n"; + // Ensure log directory exists for per-job log files + ensure_log_dir_exists(); + + std::cout << "=== cuOpt Remote Solve Server (Async) ===\n"; std::cout << "Port: " << config.port << "\n"; - std::cout << "Workers: " << config.num_workers << "\n"; + std::cout << "Workers: " << config.num_workers << " (processes)\n"; std::cout << "Log streaming: " << (config.stream_logs ? "enabled" : "disabled") << "\n"; - - // Start worker threads - std::vector workers; - for (int i = 0; i < config.num_workers; ++i) { - workers.emplace_back(worker_thread, i); + std::cout << "\n"; + std::cout << "Async API:\n"; + std::cout << " SUBMIT_JOB - Submit a job, get job_id\n"; + std::cout << " CHECK_STATUS - Check job status\n"; + std::cout << " GET_RESULT - Retrieve completed result\n"; + std::cout << " DELETE_RESULT - Delete job from server\n"; + std::cout << "\n"; + + // Initialize shared memory + if (!init_shared_memory()) { + std::cerr << "[Server] Failed to initialize shared memory\n"; + return 1; } + // Spawn worker processes + spawn_workers(); + + // Start result retrieval thread + std::thread result_thread(result_retrieval_thread); + // Create server socket int server_fd = socket(AF_INET, SOCK_STREAM, 0); if (server_fd < 0) { std::cerr << "[Server] Failed to create socket\n"; + cleanup_shared_memory(); return 1; } - // Allow address reuse int opt = 1; setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - // Bind struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; @@ -580,24 +1115,27 @@ int main(int argc, char** argv) if (bind(server_fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { std::cerr << "[Server] Failed to bind to port " << config.port << "\n"; close(server_fd); + cleanup_shared_memory(); return 1; } - // Listen if (listen(server_fd, 10) < 0) { std::cerr << "[Server] Failed to listen\n"; close(server_fd); + cleanup_shared_memory(); return 1; } std::cout << "[Server] Listening on port " << config.port << "\n"; + // Flush stdout before accept loop + std::cout.flush(); + // Accept connections while (keep_running) { struct sockaddr_in client_addr; socklen_t client_len = sizeof(client_addr); - // Use select for timeout so we can check keep_running fd_set read_fds; FD_ZERO(&read_fds); FD_SET(server_fd, &read_fds); @@ -612,7 +1150,7 @@ int main(int argc, char** argv) std::cerr << "[Server] Select error\n"; break; } - if (ready == 0) continue; // Timeout + if (ready == 0) continue; int client_fd = accept(server_fd, (struct sockaddr*)&client_addr, &client_len); if (client_fd < 0) { @@ -627,7 +1165,7 @@ int main(int argc, char** argv) std::cout << "[Server] Connection from " << client_ip << "\n"; } - // Handle in separate thread with streaming based on config + // Handle client in separate thread std::thread([client_fd]() { handle_client(client_fd, config.stream_logs); }).detach(); } @@ -635,11 +1173,18 @@ int main(int argc, char** argv) std::cout << "[Server] Shutting down...\n"; close(server_fd); + // Signal workers to stop + if (shm_ctrl) { shm_ctrl->shutdown_requested = true; } + + // Wait for result retrieval thread + result_cv.notify_all(); + if (result_thread.joinable()) { result_thread.join(); } + // Wait for workers - job_cv.notify_all(); - for (auto& w : workers) { - if (w.joinable()) w.join(); - } + wait_for_workers(); + + // Cleanup + cleanup_shared_memory(); std::cout << "[Server] Stopped\n"; return 0; diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp index b74fdb35c..ff28e04bc 100644 --- a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp @@ -195,11 +195,30 @@ class remote_serializer_t { */ virtual std::vector serialize_delete_request(const std::string& job_id) = 0; + /** + * @brief Serialize a get logs request. + * + * @param job_id The job ID to get logs for + * @param frombyte Byte offset to start reading from (0 = beginning) + * @return Serialized request bytes + */ + virtual std::vector serialize_get_logs_request(const std::string& job_id, + int64_t frombyte = 0) = 0; + /** * @brief Job status enumeration. */ enum class job_status_t { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; + /** + * @brief Structure to hold log retrieval results. + */ + struct logs_result_t { + std::vector log_lines; ///< Log lines read from file + int64_t nbytes; ///< Ending byte position (use as frombyte next time) + bool job_exists; ///< False if job_id not found + }; + /** * @brief Deserialize job submission response. * @@ -238,6 +257,122 @@ class remote_serializer_t { virtual mip_solution_t deserialize_mip_result_response( const std::vector& data) = 0; + /** + * @brief Deserialize logs response. + * + * @param data Response bytes + * @return Logs result structure + */ + virtual logs_result_t deserialize_logs_response(const std::vector& data) = 0; + + //============================================================================ + // Server-side Async Request Handling + //============================================================================ + + /** + * @brief Check if serialized data is an async protocol request. + * + * Async requests contain RequestType field (SUBMIT_JOB, CHECK_STATUS, etc.) + * + * @param data The serialized request bytes + * @return true if this is an async protocol request + */ + virtual bool is_async_request(const std::vector& data) = 0; + + /** + * @brief Get the async request type. + * + * @param data The serialized request bytes + * @return Request type: 0=SUBMIT_JOB, 1=CHECK_STATUS, 2=GET_RESULT, 3=DELETE_RESULT + */ + virtual int get_async_request_type(const std::vector& data) = 0; + + /** + * @brief Check if async request has blocking flag set. + * + * @param data The serialized request bytes + * @return true if blocking mode is requested + */ + virtual bool is_blocking_request(const std::vector& data) = 0; + + /** + * @brief Extract problem data from an async SUBMIT_JOB request. + * + * @param data The serialized async request bytes + * @return The extracted problem data (LP or MIP request) + */ + virtual std::vector extract_problem_data(const std::vector& data) = 0; + + /** + * @brief Get job_id from async request (for CHECK_STATUS, GET_RESULT, DELETE_RESULT, GET_LOGS). + * + * @param data The serialized request bytes + * @return The job ID string + */ + virtual std::string get_job_id(const std::vector& data) = 0; + + /** + * @brief Get frombyte from GET_LOGS request. + * + * @param data The serialized request bytes + * @return The byte offset to start reading from + */ + virtual int64_t get_frombyte(const std::vector& data) = 0; + + /** + * @brief Serialize a job submission response. + * + * @param success Whether submission succeeded + * @param result On success: job_id, on failure: error message + * @return Serialized response bytes + */ + virtual std::vector serialize_submit_response(bool success, + const std::string& result) = 0; + + /** + * @brief Serialize a status check response. + * + * @param status_code Job status: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND + * @param message Status message + * @return Serialized response bytes + */ + virtual std::vector serialize_status_response(int status_code, + const std::string& message) = 0; + + /** + * @brief Serialize a get result response. + * + * @param success Whether result retrieval succeeded + * @param result_data The solution data (if success) + * @param error_message Error message (if failure) + * @return Serialized response bytes + */ + virtual std::vector serialize_result_response(bool success, + const std::vector& result_data, + const std::string& error_message) = 0; + + /** + * @brief Serialize a delete response. + * + * @param success Whether deletion succeeded + * @return Serialized response bytes + */ + virtual std::vector serialize_delete_response(bool success) = 0; + + /** + * @brief Serialize a logs response. + * + * @param job_id The job ID + * @param log_lines Log lines read from file + * @param nbytes Ending byte position in log file + * @param job_exists False if job_id not found + * @return Serialized response bytes + */ + virtual std::vector serialize_logs_response(const std::string& job_id, + const std::vector& log_lines, + int64_t nbytes, + bool job_exists) = 0; + //============================================================================ // Metadata //============================================================================ diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto index faf93e094..35b7b0584 100644 --- a/cpp/src/linear_programming/utilities/cuopt_remote.proto +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -202,6 +202,7 @@ enum RequestType { CHECK_STATUS = 1; // Check job status GET_RESULT = 2; // Retrieve completed result DELETE_RESULT = 3; // Delete result from server + GET_LOGS = 4; // Retrieve buffered log entries } // Job status for async operations @@ -216,8 +217,9 @@ enum JobStatus { // Generic request wrapper for async operations message AsyncRequest { RequestType request_type = 1; - string job_id = 2; // For status/get/delete operations + string job_id = 2; // For status/get/delete/get_logs operations bool blocking = 3; // If true, server waits and returns solution (sync mode) + int64 frombyte = 4; // For GET_LOGS: byte offset to start reading from // For SUBMIT_JOB requests oneof job_data { @@ -257,6 +259,15 @@ message DeleteResponse { string message = 2; } +// Response for log retrieval (file-based, like Python server) +message LogsResponse { + ResponseStatus status = 1; + string job_id = 2; + repeated string log_lines = 3; // Log lines read from file + int64 nbytes = 4; // Ending byte position (use as frombyte in next request) + bool job_exists = 5; // False if job_id not found +} + // Generic response wrapper message AsyncResponse { RequestType request_type = 1; @@ -266,6 +277,7 @@ message AsyncResponse { StatusResponse status_response = 11; ResultResponse result_response = 12; DeleteResponse delete_response = 13; + LogsResponse logs_response = 14; } } diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 9a81762be..058a30b13 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -369,6 +369,19 @@ class protobuf_serializer_t : public remote_serializer_t { return result; } + std::vector serialize_get_logs_request(const std::string& job_id, + int64_t frombyte) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::GET_LOGS); + request.set_job_id(job_id); + request.set_frombyte(frombyte); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + bool deserialize_submit_response(const std::vector& data, std::string& job_id, std::string& error_message) override @@ -467,6 +480,222 @@ class protobuf_serializer_t : public remote_serializer_t { return proto_to_mip_solution(result.mip_solution()); } + typename remote_serializer_t::logs_result_t deserialize_logs_response( + const std::vector& data) override + { + typename remote_serializer_t::logs_result_t result; + result.nbytes = 0; + result.job_exists = false; + + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size()) || !response.has_logs_response()) { + return result; + } + + const auto& logs = response.logs_response(); + result.job_exists = logs.job_exists(); + result.nbytes = logs.nbytes(); + + result.log_lines.reserve(logs.log_lines_size()); + for (int i = 0; i < logs.log_lines_size(); ++i) { + result.log_lines.push_back(logs.log_lines(i)); + } + + return result; + } + + //============================================================================ + // Server-side Async Request Handling + //============================================================================ + + bool is_async_request(const std::vector& data) override + { + // An AsyncRequest is characterized by having the request_type field set + // and containing either lp_request or mip_request. + // We can detect it by checking if it parses as AsyncRequest AND has a job_data set. + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return false; } + + // AsyncRequest must have either lp_request or mip_request set + // (the job_data oneof). If neither is set, it's not an async request + // or it's a status/result/delete request that has job_id instead. + bool has_job_data = request.has_lp_request() || request.has_mip_request(); + bool has_job_id = !request.job_id().empty(); + + // It's an async request if it has job_data OR job_id (for non-submit requests) + return has_job_data || has_job_id; + } + + int get_async_request_type(const std::vector& data) override + { + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return -1; } + + switch (request.request_type()) { + case cuopt::remote::SUBMIT_JOB: return 0; + case cuopt::remote::CHECK_STATUS: return 1; + case cuopt::remote::GET_RESULT: return 2; + case cuopt::remote::DELETE_RESULT: return 3; + case cuopt::remote::GET_LOGS: return 4; + default: return -1; + } + } + + bool is_blocking_request(const std::vector& data) override + { + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return false; } + return request.blocking(); + } + + std::vector extract_problem_data(const std::vector& data) override + { + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return {}; } + + std::string serialized; + if (request.has_lp_request()) { + serialized = request.lp_request().SerializeAsString(); + } else if (request.has_mip_request()) { + serialized = request.mip_request().SerializeAsString(); + } else { + return {}; + } + + return std::vector(serialized.begin(), serialized.end()); + } + + std::string get_job_id(const std::vector& data) override + { + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return ""; } + return request.job_id(); + } + + int64_t get_frombyte(const std::vector& data) override + { + cuopt::remote::AsyncRequest request; + if (!request.ParseFromArray(data.data(), data.size())) { return 0; } + return request.frombyte(); + } + + std::vector serialize_submit_response(bool success, const std::string& result) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::SUBMIT_JOB); + + auto* submit = response.mutable_submit_response(); + if (success) { + submit->set_status(cuopt::remote::SUCCESS); + submit->set_job_id(result); + submit->set_message("Job submitted successfully"); + } else { + submit->set_status(cuopt::remote::ERROR_INTERNAL); + submit->set_message(result); + } + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + + std::vector serialize_status_response(int status_code, + const std::string& message) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::CHECK_STATUS); + + auto* status = response.mutable_status_response(); + + switch (status_code) { + case 0: status->set_job_status(cuopt::remote::QUEUED); break; + case 1: status->set_job_status(cuopt::remote::PROCESSING); break; + case 2: status->set_job_status(cuopt::remote::COMPLETED); break; + case 3: status->set_job_status(cuopt::remote::FAILED); break; + case 4: + default: status->set_job_status(cuopt::remote::NOT_FOUND); break; + } + status->set_message(message); + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + + std::vector serialize_result_response(bool success, + const std::vector& result_data, + const std::string& error_message) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::GET_RESULT); + + auto* result = response.mutable_result_response(); + + if (success) { + result->set_status(cuopt::remote::SUCCESS); + // The result_data is the raw serialized solution (LP or MIP) + // We need to wrap it properly. For simplicity, we'll embed it as raw bytes + // and the client will know to parse it based on the original request type. + // Actually, we should try to parse it and embed properly. + + // Try to parse as LP solution first + cuopt::remote::LPSolution lp_sol; + if (lp_sol.ParseFromArray(result_data.data(), result_data.size())) { + result->mutable_lp_solution()->CopyFrom(lp_sol); + } else { + // Try MIP solution + cuopt::remote::MIPSolution mip_sol; + if (mip_sol.ParseFromArray(result_data.data(), result_data.size())) { + result->mutable_mip_solution()->CopyFrom(mip_sol); + } + } + } else { + result->set_status(cuopt::remote::ERROR_INTERNAL); + result->set_error_message(error_message); + } + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + + std::vector serialize_delete_response(bool success) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::DELETE_RESULT); + + auto* del = response.mutable_delete_response(); + del->set_status(success ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_NOT_FOUND); + del->set_message(success ? "Job deleted" : "Job not found"); + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + + std::vector serialize_logs_response(const std::string& job_id, + const std::vector& log_lines, + int64_t nbytes, + bool job_exists) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::GET_LOGS); + + auto* logs = response.mutable_logs_response(); + logs->set_status(job_exists ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_NOT_FOUND); + logs->set_job_id(job_id); + logs->set_nbytes(nbytes); + logs->set_job_exists(job_exists); + + for (const auto& line : log_lines) { + logs->add_log_lines(line); + } + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + //============================================================================ // Metadata //============================================================================ diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu index d4e8df0b8..2f6ca52c8 100644 --- a/cpp/src/linear_programming/utilities/remote_solve.cu +++ b/cpp/src/linear_programming/utilities/remote_solve.cu @@ -16,6 +16,7 @@ #include #include +#include #include namespace cuopt::linear_programming { @@ -28,6 +29,13 @@ enum class MessageType : uint8_t { SOLUTION = 1, // Final solution data }; +// Check if sync mode is enabled (default is async) +static bool use_sync_mode() +{ + const char* sync_env = std::getenv("CUOPT_REMOTE_USE_SYNC"); + return (sync_env != nullptr && std::string(sync_env) == "1"); +} + /** * @brief Simple socket client for remote solve with streaming support */ @@ -68,7 +76,6 @@ class remote_client_t { return false; } - CUOPT_LOG_INFO("[remote_solve] Connected to {}:{}", host_, port_); return true; } @@ -215,15 +222,182 @@ class remote_client_t { int sockfd_; }; +//============================================================================ +// Async Mode Helpers +//============================================================================ + +template +static std::pair submit_job(const std::string& host, + int port, + const std::vector& request_data) +{ + remote_client_t client(host, port); + if (!client.connect()) { return {false, "Failed to connect to server"}; } + + if (!client.send_request(request_data)) { return {false, "Failed to send request"}; } + + std::vector response_data; + if (!client.receive_response(response_data)) { return {false, "Failed to receive response"}; } + + auto serializer = get_serializer(); + std::string job_id; + std::string error_message; + if (!serializer->deserialize_submit_response(response_data, job_id, error_message)) { + return {false, error_message}; + } + + return {true, job_id}; +} + +/** + * @brief Retrieve and display buffered logs from the server. + * + * @param host Server host + * @param port Server port + * @param job_id Job ID + * @param frombyte Byte offset to start reading from + * @return std::pair - (job_exists, new_frombyte) + */ +template +static std::pair get_logs(const std::string& host, + int port, + const std::string& job_id, + int64_t frombyte) +{ + remote_client_t client(host, port); + if (!client.connect()) { return {false, frombyte}; } + + auto serializer = get_serializer(); + auto logs_request = serializer->serialize_get_logs_request(job_id, frombyte); + + if (!client.send_request(logs_request)) { return {false, frombyte}; } + + std::vector response_data; + if (!client.receive_response(response_data)) { return {false, frombyte}; } + + auto result = serializer->deserialize_logs_response(response_data); + + // Print any new log lines + for (const auto& line : result.log_lines) { + std::cout << line << "\n"; + } + if (!result.log_lines.empty()) { std::cout.flush(); } + + return {result.job_exists, result.nbytes}; +} + +template +static bool poll_until_complete(const std::string& host, + int port, + const std::string& job_id, + bool verbose) +{ + auto serializer = get_serializer(); + using job_status_t = typename remote_serializer_t::job_status_t; + + int64_t log_frombyte = 0; // Track position in log file + + while (true) { + // Fetch and display any new log entries + if (verbose) { + auto [job_exists, new_frombyte] = get_logs(host, port, job_id, log_frombyte); + if (job_exists) { log_frombyte = new_frombyte; } + } + + remote_client_t client(host, port); + if (!client.connect()) { + CUOPT_LOG_ERROR("[remote_solve] Failed to connect for status check"); + return false; + } + + auto status_request = serializer->serialize_status_request(job_id); + if (!client.send_request(status_request)) { + CUOPT_LOG_ERROR("[remote_solve] Failed to send status request"); + return false; + } + + std::vector response_data; + if (!client.receive_response(response_data)) { + CUOPT_LOG_ERROR("[remote_solve] Failed to receive status response"); + return false; + } + + auto status = serializer->deserialize_status_response(response_data); + + if (status == job_status_t::COMPLETED) { + // Fetch any remaining log entries + if (verbose) { + get_logs(host, port, job_id, log_frombyte); + CUOPT_LOG_INFO("[remote_solve] Job {} completed", job_id); + } + return true; + } else if (status == job_status_t::FAILED) { + // Fetch any remaining log entries (may contain error info) + if (verbose) { get_logs(host, port, job_id, log_frombyte); } + CUOPT_LOG_ERROR("[remote_solve] Job {} failed", job_id); + return false; + } else if (status == job_status_t::NOT_FOUND) { + CUOPT_LOG_ERROR("[remote_solve] Job {} not found", job_id); + return false; + } + + // Job still queued or processing, wait and try again + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } +} + +template +static std::pair> get_result(const std::string& host, + int port, + const std::string& job_id) +{ + remote_client_t client(host, port); + if (!client.connect()) { return {false, {}}; } + + auto serializer = get_serializer(); + auto result_request = serializer->serialize_get_result_request(job_id); + + if (!client.send_request(result_request)) { return {false, {}}; } + + std::vector response_data; + if (!client.receive_response(response_data)) { return {false, {}}; } + + return {true, response_data}; +} + +template +static void delete_job(const std::string& host, int port, const std::string& job_id) +{ + remote_client_t client(host, port); + if (!client.connect()) { return; } + + auto serializer = get_serializer(); + auto delete_request = serializer->serialize_delete_request(job_id); + + if (!client.send_request(delete_request)) { return; } + + std::vector response_data; + client.receive_response(response_data); // Ignore result +} + } // namespace +//============================================================================ +// LP Remote Solve +//============================================================================ + template optimization_problem_solution_t solve_lp_remote( const remote_solve_config_t& config, const cuopt::mps_parser::data_model_view_t& view, const pdlp_solver_settings_t& settings) { - CUOPT_LOG_INFO("[remote_solve] Solving LP remotely on {}:{}", config.host, config.port); + const bool sync_mode = use_sync_mode(); + + CUOPT_LOG_INFO("[remote_solve] Solving LP remotely on {}:{} ({} mode)", + config.host, + config.port, + sync_mode ? "sync" : "async"); // Log problem info (similar to local solve) if (settings.log_to_console) { @@ -240,50 +414,102 @@ optimization_problem_solution_t solve_lp_remote( auto serializer = get_serializer(); - // Serialize the request - std::vector request_data = serializer->serialize_lp_request(view, settings); - CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request: {} bytes", request_data.size()); + if (sync_mode) { + //========================================================================= + // SYNC MODE: Use blocking request with streaming logs + //========================================================================= + + // Serialize as async request with blocking=true + std::vector request_data = + serializer->serialize_async_lp_request(view, settings, true /* blocking */); + CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (sync): {} bytes", request_data.size()); + + // Connect and send + remote_client_t client(config.host, config.port); + if (!client.connect()) { + return optimization_problem_solution_t(cuopt::logic_error( + "Failed to connect to remote server", cuopt::error_type_t::RuntimeError)); + } - // Connect and send - remote_client_t client(config.host, config.port); - if (!client.connect()) { - return optimization_problem_solution_t( - cuopt::logic_error("Failed to connect to remote server", cuopt::error_type_t::RuntimeError)); - } + if (!client.send_request(request_data)) { + return optimization_problem_solution_t(cuopt::logic_error( + "Failed to send request to remote server", cuopt::error_type_t::RuntimeError)); + } - if (!client.send_request(request_data)) { - return optimization_problem_solution_t(cuopt::logic_error( - "Failed to send request to remote server", cuopt::error_type_t::RuntimeError)); - } + // Receive response with streaming log support + std::vector response_data; + if (!client.receive_streaming_response(response_data, settings.log_to_console)) { + return optimization_problem_solution_t(cuopt::logic_error( + "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); + } - // Receive response with streaming log support - // Server sends LOG_MESSAGE types during solve, then SOLUTION at end - std::vector response_data; - if (!client.receive_streaming_response(response_data, settings.log_to_console)) { - return optimization_problem_solution_t(cuopt::logic_error( - "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); - } + CUOPT_LOG_DEBUG("[remote_solve] Received LP solution (sync): {} bytes", response_data.size()); - CUOPT_LOG_DEBUG("[remote_solve] Received LP solution: {} bytes", response_data.size()); + // Deserialize solution + return serializer->deserialize_lp_solution(response_data); - // Deserialize solution - auto solution = serializer->deserialize_lp_solution(response_data); + } else { + //========================================================================= + // ASYNC MODE: Submit → Poll → Get Result → Delete + //========================================================================= - // Note: Detailed logs were already streamed from server, no need to duplicate summary here + // Serialize as async request with blocking=false + std::vector request_data = + serializer->serialize_async_lp_request(view, settings, false /* blocking */); + CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (async): {} bytes", request_data.size()); - return solution; + // Submit job + auto [submit_ok, job_id_or_error] = + submit_job(config.host, config.port, request_data); + if (!submit_ok) { + return optimization_problem_solution_t(cuopt::logic_error( + "Job submission failed: " + job_id_or_error, cuopt::error_type_t::RuntimeError)); + } + std::string job_id = job_id_or_error; + CUOPT_LOG_INFO("[remote_solve] Job submitted, ID: {}", job_id); + + // Poll until complete + if (!poll_until_complete(config.host, config.port, job_id, settings.log_to_console)) { + delete_job(config.host, config.port, job_id); + return optimization_problem_solution_t( + cuopt::logic_error("Job failed or not found", cuopt::error_type_t::RuntimeError)); + } + + // Get result + auto [result_ok, result_data] = get_result(config.host, config.port, job_id); + if (!result_ok) { + delete_job(config.host, config.port, job_id); + return optimization_problem_solution_t( + cuopt::logic_error("Failed to retrieve result", cuopt::error_type_t::RuntimeError)); + } + + // Delete job from server + delete_job(config.host, config.port, job_id); + CUOPT_LOG_DEBUG("[remote_solve] Job {} deleted from server", job_id); + + // Deserialize solution from async result response + return serializer->deserialize_lp_result_response(result_data); + } } +//============================================================================ +// MIP Remote Solve +//============================================================================ + template mip_solution_t solve_mip_remote( const remote_solve_config_t& config, const cuopt::mps_parser::data_model_view_t& view, const mip_solver_settings_t& settings) { - CUOPT_LOG_INFO("[remote_solve] Solving MIP remotely on {}:{}", config.host, config.port); + const bool sync_mode = use_sync_mode(); - // Log problem info (similar to local solve) - // Note: MIP settings don't have log_to_console, so we always log + CUOPT_LOG_INFO("[remote_solve] Solving MIP remotely on {}:{} ({} mode)", + config.host, + config.port, + sync_mode ? "sync" : "async"); + + // Log problem info { auto n_rows = view.get_constraint_matrix_offsets().size() > 0 ? static_cast(view.get_constraint_matrix_offsets().size()) - 1 @@ -299,37 +525,77 @@ mip_solution_t solve_mip_remote( auto serializer = get_serializer(); - // Serialize the request - std::vector request_data = serializer->serialize_mip_request(view, settings); - CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request: {} bytes", request_data.size()); + if (sync_mode) { + //========================================================================= + // SYNC MODE: Use blocking request with streaming logs + //========================================================================= - // Connect and send - remote_client_t client(config.host, config.port); - if (!client.connect()) { - return mip_solution_t( - cuopt::logic_error("Failed to connect to remote server", cuopt::error_type_t::RuntimeError)); - } + std::vector request_data = + serializer->serialize_async_mip_request(view, settings, true /* blocking */); + CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (sync): {} bytes", request_data.size()); - if (!client.send_request(request_data)) { - return mip_solution_t(cuopt::logic_error("Failed to send request to remote server", - cuopt::error_type_t::RuntimeError)); - } + remote_client_t client(config.host, config.port); + if (!client.connect()) { + return mip_solution_t(cuopt::logic_error("Failed to connect to remote server", + cuopt::error_type_t::RuntimeError)); + } - // Receive response with streaming log support - std::vector response_data; - if (!client.receive_streaming_response(response_data, true /* log_to_console */)) { - return mip_solution_t(cuopt::logic_error( - "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); - } + if (!client.send_request(request_data)) { + return mip_solution_t(cuopt::logic_error("Failed to send request to remote server", + cuopt::error_type_t::RuntimeError)); + } - CUOPT_LOG_DEBUG("[remote_solve] Received MIP solution: {} bytes", response_data.size()); + std::vector response_data; + if (!client.receive_streaming_response(response_data, true /* log_to_console */)) { + return mip_solution_t(cuopt::logic_error( + "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); + } - // Deserialize solution - auto solution = serializer->deserialize_mip_solution(response_data); + CUOPT_LOG_DEBUG("[remote_solve] Received MIP solution (sync): {} bytes", response_data.size()); - // Note: Detailed logs were already streamed from server, no need to duplicate summary here + return serializer->deserialize_mip_solution(response_data); - return solution; + } else { + //========================================================================= + // ASYNC MODE: Submit → Poll → Get Result → Delete + //========================================================================= + + std::vector request_data = + serializer->serialize_async_mip_request(view, settings, false /* blocking */); + CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (async): {} bytes", request_data.size()); + + // Submit job + auto [submit_ok, job_id_or_error] = + submit_job(config.host, config.port, request_data); + if (!submit_ok) { + return mip_solution_t(cuopt::logic_error( + "Job submission failed: " + job_id_or_error, cuopt::error_type_t::RuntimeError)); + } + std::string job_id = job_id_or_error; + CUOPT_LOG_INFO("[remote_solve] Job submitted, ID: {}", job_id); + + // Poll until complete + if (!poll_until_complete(config.host, config.port, job_id, true /* verbose */)) { + delete_job(config.host, config.port, job_id); + return mip_solution_t( + cuopt::logic_error("Job failed or not found", cuopt::error_type_t::RuntimeError)); + } + + // Get result + auto [result_ok, result_data] = get_result(config.host, config.port, job_id); + if (!result_ok) { + delete_job(config.host, config.port, job_id); + return mip_solution_t( + cuopt::logic_error("Failed to retrieve result", cuopt::error_type_t::RuntimeError)); + } + + // Delete job from server + delete_job(config.host, config.port, job_id); + CUOPT_LOG_DEBUG("[remote_solve] Job {} deleted from server", job_id); + + // Deserialize solution from async result response + return serializer->deserialize_mip_result_response(result_data); + } } // Explicit instantiations From 44b16f0182b6fe9eecc67704423c5c56e8479253 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Wed, 7 Jan 2026 14:30:04 -0500 Subject: [PATCH 14/37] Add worker monitoring and msgpack async support Server (cuopt_remote_server.cpp): - Add worker process monitoring with waitpid - Automatically restart dead workers - Mark in-progress jobs as FAILED when worker dies - Add flush calls to monitoring log messages Msgpack serializer: - Implement full async interface support - Add async message types (MSG_ASYNC_LP/MIP_REQUEST, etc.) - Implement server-side methods: is_async_request, is_blocking_request, extract_problem_data, get_async_request_type, get_job_id, get_frombyte - Implement response serialization: serialize_submit_response, serialize_status_response, serialize_result_response, serialize_delete_response, serialize_logs_response - Fix is_mip_request to detect both sync and async MIP formats Tested with LP (afiro.mps) and MIP (flugpl.mps) using msgpack async. --- cpp/cuopt_remote_server.cpp | 150 ++++++- .../serializers/msgpack_serializer.cpp | 390 +++++++++++++++++- 2 files changed, 519 insertions(+), 21 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 8ec84364a..990853fce 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -68,8 +68,9 @@ struct JobQueueEntry { uint32_t problem_type; // 0 = LP, 1 = MIP uint32_t data_size; uint8_t data[MAX_JOB_DATA_SIZE]; - std::atomic ready; // Job is ready to be processed - std::atomic claimed; // Worker has claimed this job + std::atomic ready; // Job is ready to be processed + std::atomic claimed; // Worker has claimed this job + std::atomic worker_pid; // PID of worker that claimed this job (0 if none) }; struct ResultQueueEntry { @@ -363,8 +364,9 @@ bool init_shared_memory() // Initialize job queue entries for (size_t i = 0; i < MAX_JOBS; ++i) { - job_queue[i].ready = false; - job_queue[i].claimed = false; + job_queue[i].ready = false; + job_queue[i].claimed = false; + job_queue[i].worker_pid = 0; } // Create result queue shared memory @@ -466,7 +468,8 @@ void worker_process(int worker_id) // Try to claim this job atomically bool expected = false; if (job_queue[i].claimed.compare_exchange_strong(expected, true)) { - job_slot = i; + job_queue[i].worker_pid = getpid(); // Record our PID + job_slot = i; break; } } @@ -558,8 +561,9 @@ void worker_process(int worker_id) } // Clear job slot - job.ready = false; - job.claimed = false; + job.worker_pid = 0; + job.ready = false; + job.claimed = false; std::cout << "[Worker " << worker_id << "] Completed job: " << job_id << " (success: " << success << ")\n"; @@ -596,6 +600,127 @@ void wait_for_workers() worker_pids.clear(); } +// Spawn a single replacement worker and return its PID +pid_t spawn_single_worker(int worker_id) +{ + pid_t pid = fork(); + if (pid < 0) { + std::cerr << "[Server] Failed to fork replacement worker " << worker_id << "\n"; + return -1; + } else if (pid == 0) { + // Child process + worker_process(worker_id); + _exit(0); // Should not reach here + } + return pid; +} + +// Mark jobs being processed by a dead worker as failed +void mark_worker_jobs_failed(pid_t dead_worker_pid) +{ + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (job_queue[i].ready && job_queue[i].claimed && job_queue[i].worker_pid == dead_worker_pid) { + std::string job_id(job_queue[i].job_id); + std::cerr << "[Server] Worker " << dead_worker_pid << " died while processing job: " << job_id + << "\n"; + + // Store failure result in result queue + for (size_t j = 0; j < MAX_RESULTS; ++j) { + if (!result_queue[j].ready) { + strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1); + result_queue[j].status = 1; // Error + result_queue[j].data_size = 0; + strncpy(result_queue[j].error_message, + "Worker process died unexpectedly", + sizeof(result_queue[j].error_message) - 1); + result_queue[j].retrieved = false; + result_queue[j].ready = true; + break; + } + } + + // Clear the job slot + job_queue[i].worker_pid = 0; + job_queue[i].ready = false; + job_queue[i].claimed = false; + + // Update job tracker + { + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + if (it != job_tracker.end()) { + it->second.status = JobStatus::FAILED; + it->second.error_message = "Worker process died unexpectedly"; + } + } + } + } +} + +// Worker monitor thread - detects dead workers and restarts them +void worker_monitor_thread() +{ + std::cout << "[Server] Worker monitor thread started\n"; + std::cout.flush(); + + while (keep_running) { + // Check all worker PIDs for dead workers + for (size_t i = 0; i < worker_pids.size(); ++i) { + pid_t pid = worker_pids[i]; + if (pid <= 0) continue; + + int status; + pid_t result = waitpid(pid, &status, WNOHANG); + + if (result == pid) { + // Worker has exited + int exit_code = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + bool signaled = WIFSIGNALED(status); + int signal_num = signaled ? WTERMSIG(status) : 0; + + if (signaled) { + std::cerr << "[Server] Worker " << pid << " killed by signal " << signal_num << "\n"; + std::cerr.flush(); + } else if (exit_code != 0) { + std::cerr << "[Server] Worker " << pid << " exited with code " << exit_code << "\n"; + std::cerr.flush(); + } else { + // Clean exit during shutdown - don't restart + if (shm_ctrl && shm_ctrl->shutdown_requested) { + worker_pids[i] = 0; + continue; + } + std::cerr << "[Server] Worker " << pid << " exited unexpectedly\n"; + std::cerr.flush(); + } + + // Mark any jobs this worker was processing as failed + mark_worker_jobs_failed(pid); + + // Spawn replacement worker (unless shutting down) + if (keep_running && shm_ctrl && !shm_ctrl->shutdown_requested) { + pid_t new_pid = spawn_single_worker(static_cast(i)); + if (new_pid > 0) { + worker_pids[i] = new_pid; + std::cout << "[Server] Restarted worker " << i << " with PID " << new_pid << "\n"; + std::cout.flush(); + } else { + worker_pids[i] = 0; // Failed to restart + } + } else { + worker_pids[i] = 0; + } + } + } + + // Check every 100ms + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + std::cout << "[Server] Worker monitor thread stopped\n"; + std::cout.flush(); +} + // ============================================================================ // Result Retrieval Thread (main process) // ============================================================================ @@ -687,8 +812,9 @@ std::pair submit_job_async(const std::vector& reques job_queue[i].problem_type = is_mip ? 1 : 0; job_queue[i].data_size = request_data.size(); memcpy(job_queue[i].data, request_data.data(), request_data.size()); - job_queue[i].claimed = false; - job_queue[i].ready = true; // Mark as ready last + job_queue[i].worker_pid = 0; + job_queue[i].claimed = false; + job_queue[i].ready = true; // Mark as ready last // Track job { @@ -1095,6 +1221,9 @@ int main(int argc, char** argv) // Start result retrieval thread std::thread result_thread(result_retrieval_thread); + // Start worker monitor thread (detects dead workers and restarts them) + std::thread monitor_thread(worker_monitor_thread); + // Create server socket int server_fd = socket(AF_INET, SOCK_STREAM, 0); if (server_fd < 0) { @@ -1180,6 +1309,9 @@ int main(int argc, char** argv) result_cv.notify_all(); if (result_thread.joinable()) { result_thread.join(); } + // Wait for worker monitor thread + if (monitor_thread.joinable()) { monitor_thread.join(); } + // Wait for workers wait_for_workers(); diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index 56084fa24..e3e1a0334 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -21,12 +21,24 @@ namespace cuopt::linear_programming { -// Message type identifiers +// Message type identifiers for sync mode constexpr uint8_t MSG_LP_REQUEST = 1; constexpr uint8_t MSG_MIP_REQUEST = 2; constexpr uint8_t MSG_LP_SOLUTION = 3; constexpr uint8_t MSG_MIP_SOLUTION = 4; +// Message type identifiers for async mode +constexpr uint8_t MSG_ASYNC_LP_REQUEST = 10; +constexpr uint8_t MSG_ASYNC_MIP_REQUEST = 11; +constexpr uint8_t MSG_STATUS_REQUEST = 12; +constexpr uint8_t MSG_GET_RESULT = 13; +constexpr uint8_t MSG_DELETE_REQUEST = 14; +constexpr uint8_t MSG_GET_LOGS = 15; + +constexpr uint8_t MSG_SUBMIT_RESPONSE = 20; +constexpr uint8_t MSG_STATUS_RESPONSE = 21; +constexpr uint8_t MSG_LOGS_RESPONSE = 22; + template class msgpack_serializer_t : public remote_serializer_t { public: @@ -183,7 +195,9 @@ class msgpack_serializer_t : public remote_serializer_t { msgpack::object_handle oh = msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); if (oh.get().type == msgpack::type::POSITIVE_INTEGER) { - return oh.get().as() == MSG_MIP_REQUEST; + uint8_t msg_type = oh.get().as(); + // Check both sync and async MIP request types + return msg_type == MSG_MIP_REQUEST || msg_type == MSG_ASYNC_MIP_REQUEST; } } catch (...) { } @@ -331,7 +345,7 @@ class msgpack_serializer_t : public remote_serializer_t { } //============================================================================ - // Async Operations (simplified for testing) + // Async Operations //============================================================================ std::vector serialize_async_lp_request( @@ -339,7 +353,31 @@ class msgpack_serializer_t : public remote_serializer_t { const pdlp_solver_settings_t& settings, bool blocking) override { - return serialize_lp_request(view, settings); + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + // Header: msg_type, blocking flag, job_id (empty for new submission) + pk.pack_uint8(MSG_ASYNC_LP_REQUEST); + pk.pack(blocking); + pk.pack(std::string("")); // job_id empty for new submission + pk.pack(int64_t(0)); // frombyte (unused for LP requests) + + // Pack the problem and settings + pk.pack_uint32(protocol_version()); + pack_problem(pk, view); + + // Pack LP settings + pk.pack_map(4); + pk.pack("time_limit"); + pk.pack(settings.time_limit); + pk.pack("iteration_limit"); + pk.pack(static_cast(settings.iteration_limit)); + pk.pack("abs_gap_tol"); + pk.pack(settings.tolerances.absolute_gap_tolerance); + pk.pack("rel_gap_tol"); + pk.pack(settings.tolerances.relative_gap_tolerance); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); } std::vector serialize_async_mip_request( @@ -347,41 +385,168 @@ class msgpack_serializer_t : public remote_serializer_t { const mip_solver_settings_t& settings, bool blocking) override { - return serialize_mip_request(view, settings); + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + + // Header: msg_type, blocking flag, job_id (empty for new submission) + pk.pack_uint8(MSG_ASYNC_MIP_REQUEST); + pk.pack(blocking); + pk.pack(std::string("")); // job_id empty for new submission + pk.pack(int64_t(0)); // frombyte (unused for MIP requests) + + // Pack the problem and settings + pk.pack_uint32(protocol_version()); + pack_problem(pk, view); + + pk.pack_map(2); + pk.pack("time_limit"); + pk.pack(settings.time_limit); + pk.pack("mip_gap"); + pk.pack(settings.tolerances.relative_mip_gap); + + return std::vector(buffer.data(), buffer.data() + buffer.size()); } std::vector serialize_status_request(const std::string& job_id) override { msgpack::sbuffer buffer; msgpack::packer pk(&buffer); - pk.pack_map(1); - pk.pack("job_id"); + pk.pack_uint8(MSG_STATUS_REQUEST); + pk.pack(false); // blocking (unused) pk.pack(job_id); + pk.pack(int64_t(0)); // frombyte (unused) return std::vector(buffer.data(), buffer.data() + buffer.size()); } std::vector serialize_get_result_request(const std::string& job_id) override { - return serialize_status_request(job_id); + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_GET_RESULT); + pk.pack(false); // blocking (unused) + pk.pack(job_id); + pk.pack(int64_t(0)); // frombyte (unused) + return std::vector(buffer.data(), buffer.data() + buffer.size()); } std::vector serialize_delete_request(const std::string& job_id) override { - return serialize_status_request(job_id); + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_DELETE_REQUEST); + pk.pack(false); // blocking (unused) + pk.pack(job_id); + pk.pack(int64_t(0)); // frombyte (unused) + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_get_logs_request(const std::string& job_id, + int64_t frombyte = 0) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_GET_LOGS); + pk.pack(false); // blocking (unused) + pk.pack(job_id); + pk.pack(frombyte); + return std::vector(buffer.data(), buffer.data() + buffer.size()); } bool deserialize_submit_response(const std::vector& data, std::string& job_id, std::string& error_message) override { - job_id = "msgpack_job"; - return true; + try { + size_t offset = 0; + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh_type.get().as(); + + if (msg_type != MSG_SUBMIT_RESPONSE) { + error_message = "Invalid response type"; + return false; + } + + msgpack::object_handle oh_success = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + bool success = oh_success.get().as(); + + msgpack::object_handle oh_job_id = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + job_id = oh_job_id.get().as(); + + if (!success) { + msgpack::object_handle oh_err = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + error_message = oh_err.get().as(); + } + + return success; + } catch (const std::exception& e) { + error_message = std::string("MsgPack parse error: ") + e.what(); + return false; + } } typename remote_serializer_t::job_status_t deserialize_status_response( const std::vector& data) override { - return remote_serializer_t::job_status_t::COMPLETED; + using job_status_t = typename remote_serializer_t::job_status_t; + try { + size_t offset = 0; + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh_type.get().as(); + + if (msg_type != MSG_STATUS_RESPONSE) { return job_status_t::NOT_FOUND; } + + msgpack::object_handle oh_status = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + int status = oh_status.get().as(); + + // Status codes: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND + switch (status) { + case 0: return job_status_t::QUEUED; + case 1: return job_status_t::PROCESSING; + case 2: return job_status_t::COMPLETED; + case 3: return job_status_t::FAILED; + default: return job_status_t::NOT_FOUND; + } + } catch (...) { + return job_status_t::NOT_FOUND; + } + } + + typename remote_serializer_t::logs_result_t deserialize_logs_response( + const std::vector& data) override + { + typename remote_serializer_t::logs_result_t result; + result.nbytes = 0; + result.job_exists = false; + + try { + size_t offset = 0; + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh_type.get().as(); + + if (msg_type != MSG_LOGS_RESPONSE) { return result; } + + msgpack::object_handle oh_exists = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + result.job_exists = oh_exists.get().as(); + + msgpack::object_handle oh_nbytes = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + result.nbytes = oh_nbytes.get().as(); + + msgpack::object_handle oh_lines = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + auto lines_array = oh_lines.get().as>(); + result.log_lines = std::move(lines_array); + } catch (...) { + } + return result; } optimization_problem_solution_t deserialize_lp_result_response( @@ -396,6 +561,207 @@ class msgpack_serializer_t : public remote_serializer_t { return deserialize_mip_solution(data); } + //============================================================================ + // Server-side async request detection + //============================================================================ + + bool is_async_request(const std::vector& data) override + { + if (data.empty()) return false; + try { + size_t offset = 0; + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + if (oh.get().type == msgpack::type::POSITIVE_INTEGER) { + uint8_t msg_type = oh.get().as(); + return msg_type >= MSG_ASYNC_LP_REQUEST && msg_type <= MSG_GET_LOGS; + } + } catch (...) { + } + return false; + } + + bool is_blocking_request(const std::vector& data) override + { + if (data.empty()) return false; + try { + size_t offset = 0; + // Skip msg_type + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + + // Read blocking flag + msgpack::object_handle oh_blocking = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + return oh_blocking.get().as(); + } catch (...) { + } + return false; + } + + std::vector extract_problem_data(const std::vector& data) override + { + // For msgpack, we extract the problem portion by repacking + // The full request contains: msg_type, blocking, job_id, frombyte, version, problem, settings + // We need to return a sync-style request: msg_type, version, problem, settings + if (data.empty()) return {}; + + try { + size_t offset = 0; + + // Read header + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh_type.get().as(); + + // Map async type to sync type + uint8_t sync_type = (msg_type == MSG_ASYNC_MIP_REQUEST) ? MSG_MIP_REQUEST : MSG_LP_REQUEST; + + // Skip blocking, job_id, frombyte + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); // blocking + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); // job_id + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); // frombyte + + // Create sync-style request: msg_type, then rest of data (version, problem, settings) + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(sync_type); + + // Append the rest of the data (version, problem, settings) + std::vector result(buffer.data(), buffer.data() + buffer.size()); + result.insert(result.end(), data.begin() + offset, data.end()); + return result; + + } catch (...) { + } + return {}; + } + + int64_t get_frombyte(const std::vector& data) override + { + if (data.empty()) return 0; + try { + size_t offset = 0; + // Skip msg_type + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // Skip blocking + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // Skip job_id + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // Read frombyte + msgpack::object_handle oh_frombyte = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + return oh_frombyte.get().as(); + } catch (...) { + } + return 0; + } + + int get_async_request_type(const std::vector& data) override + { + if (data.empty()) return -1; + try { + size_t offset = 0; + msgpack::object_handle oh = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh.get().as(); + + // Map msgpack types to the RequestType enum expected by server + switch (msg_type) { + case MSG_ASYNC_LP_REQUEST: + case MSG_ASYNC_MIP_REQUEST: return 0; // SUBMIT_JOB + case MSG_STATUS_REQUEST: return 1; // CHECK_STATUS + case MSG_GET_RESULT: return 2; // GET_RESULT + case MSG_DELETE_REQUEST: return 3; // DELETE_RESULT + case MSG_GET_LOGS: return 4; // GET_LOGS + default: return -1; + } + } catch (...) { + } + return -1; + } + + std::string get_job_id(const std::vector& data) override + { + if (data.empty()) return ""; + try { + size_t offset = 0; + // Skip msg_type + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // Skip blocking + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + // Read job_id + msgpack::object_handle oh_job_id = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + return oh_job_id.get().as(); + } catch (...) { + } + return ""; + } + + //============================================================================ + // Server-side response serialization + //============================================================================ + + std::vector serialize_submit_response(bool success, const std::string& result) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_SUBMIT_RESPONSE); + pk.pack(success); + pk.pack(result); // job_id on success, error message on failure + if (!success) { pk.pack(result); } // error message duplicated for compatibility + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_status_response(int status_code, + const std::string& message) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_STATUS_RESPONSE); + pk.pack(status_code); + pk.pack(message); + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_result_response(bool success, + const std::vector& result_data, + const std::string& error_message) override + { + // For result response, we prepend success flag then the actual solution data + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack(success); + pk.pack(error_message); + // Append raw solution data + std::vector response(buffer.data(), buffer.data() + buffer.size()); + response.insert(response.end(), result_data.begin(), result_data.end()); + return response; + } + + std::vector serialize_delete_response(bool success) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack(success); + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + + std::vector serialize_logs_response(const std::string& job_id, + const std::vector& log_lines, + int64_t nbytes, + bool job_exists) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_LOGS_RESPONSE); + pk.pack(job_exists); + pk.pack(nbytes); + pk.pack(log_lines); + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + private: void pack_problem(msgpack::packer& pk, const mps_parser::data_model_view_t& view) From a03b00dc91d4e40836a67dca3b7f50fbf95c2e83 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Wed, 7 Jan 2026 16:28:20 -0500 Subject: [PATCH 15/37] Add remote solve and memory architecture documentation - docs/developer/REMOTE_SOLVE_GUIDE.md: Comprehensive guide covering server setup, client configuration, async/sync modes, job lifecycle, custom serialization, worker monitoring, and troubleshooting - docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md: Documents the hybrid GPU/CPU memory strategy for solutions, including to_host() method, accessor patterns, and design rationale Deleted 56 obsolete markdown files from root directory that contained outdated information from previous development iterations. --- docs/developer/REMOTE_SOLVE_GUIDE.md | 198 +++++++++++++++ .../developer/SOLUTION_MEMORY_ARCHITECTURE.md | 236 ++++++++++++++++++ 2 files changed, 434 insertions(+) create mode 100644 docs/developer/REMOTE_SOLVE_GUIDE.md create mode 100644 docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md diff --git a/docs/developer/REMOTE_SOLVE_GUIDE.md b/docs/developer/REMOTE_SOLVE_GUIDE.md new file mode 100644 index 000000000..eec753e91 --- /dev/null +++ b/docs/developer/REMOTE_SOLVE_GUIDE.md @@ -0,0 +1,198 @@ +# cuOpt Remote Solve Guide + +This guide covers the remote solve feature for cuOpt, enabling GPU-accelerated optimization from CPU-only client machines. + +## Overview + +The remote solve feature allows: +- **CPU-only clients** to solve LP/MIP problems using a GPU-equipped server +- **Async job management** for non-blocking operations +- **Pluggable serialization** (default: Protocol Buffers, also supports MsgPack) +- **Real-time log streaming** from solver to client +- **Worker process isolation** with automatic restart on failure + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ CLIENT │ +│ - Submits job → receives job ID │ +│ - Polls for status (QUEUED/PROCESSING/COMPLETED/FAILED) │ +│ - Retrieves logs incrementally via GET_LOGS │ +│ - Retrieves result → gets solution │ +│ - Deletes result → frees server memory │ +└────────────────────┬────────────────────────────────────────┘ + │ TCP/IP (Protobuf or custom serializer) + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ ASYNC SERVER (Main Process) │ +│ - Accepts connections (default port 8765) │ +│ - Handles: SUBMIT_JOB, CHECK_STATUS, GET_RESULT, │ +│ DELETE_RESULT, GET_LOGS │ +│ - Shared memory job queue │ +│ - Worker monitor thread (restarts dead workers) │ +│ - Result retrieval thread │ +└──────┬──────────────────────────────────┬──────────────────┘ + │ Shared Memory │ + ▼ ▼ +┌─────────────────────────────────────────────────────────────┐ +│ WORKER PROCESS(es) │ +│ - Reads jobs from shared memory queue │ +│ - Logs to per-job file (/tmp/cuopt_logs/log_{job_id}) │ +│ - Solves using GPU (cuOpt library) │ +│ - Writes results to shared memory result queue │ +│ - Isolated - crash doesn't affect server │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Quick Start + +### Building + +```bash +# Build with remote server support +./build.sh libcuopt cuopt_remote_server +``` + +### Running the Server + +```bash +# Start server with 4 worker processes on port 8765 +./cpp/build/cuopt_remote_server -p 8765 -w 4 +``` + +Server options: +- `-p PORT` - Port number (default: 8765) +- `-w NUM` - Number of worker processes (default: 4) + +### Client Configuration + +Set environment variables to enable remote solve: + +```bash +export CUOPT_REMOTE_HOST=server.example.com +export CUOPT_REMOTE_PORT=8765 + +# Optional: Use sync mode (blocking with log streaming) +export CUOPT_REMOTE_USE_SYNC=1 +``` + +### Using cuopt_cli + +```bash +# Remote solve (async mode by default) +CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 \ + ./cpp/build/cuopt_cli problem.mps + +# Remote solve with log streaming (sync mode) +CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 \ +CUOPT_REMOTE_USE_SYNC=1 \ + ./cpp/build/cuopt_cli --log-to-console 1 problem.mps +``` + +### Python Usage + +```python +import os +os.environ['CUOPT_REMOTE_HOST'] = 'localhost' +os.environ['CUOPT_REMOTE_PORT'] = '8765' + +from cuopt.linear_programming import DataModel, SolverSettings, solve_lp + +# Create problem +dm = DataModel() +dm.set_csr_constraint_matrix(...) +# ... set up problem ... + +# Solve remotely (transparent to user) +solution = solve_lp(dm, SolverSettings()) +print(f"Objective: {solution.get_objective_value()}") +``` + +## Operating Modes + +### Sync Mode (`CUOPT_REMOTE_USE_SYNC=1`) +- Client sends request and waits for result +- Server streams solver logs in real-time +- Best for interactive use + +### Async Mode (default) +- Client submits job, receives job_id immediately +- Client polls for status and logs incrementally +- Client retrieves result when complete +- Best for batch processing, long-running jobs + +## Job Lifecycle (Async Mode) + +1. **SUBMIT_JOB** → Returns `job_id` +2. **CHECK_STATUS** → Returns QUEUED | PROCESSING | COMPLETED | FAILED +3. **GET_LOGS** → Returns log lines from `frombyte` offset +4. **GET_RESULT** → Returns serialized solution +5. **DELETE_RESULT** → Removes job from server + +## Custom Serialization + +The default serializer uses Protocol Buffers. You can provide a custom serializer: + +```bash +# Set custom serializer library +export CUOPT_SERIALIZER_LIB=/path/to/libcustom_serializer.so + +# Run server and client with same serializer +CUOPT_SERIALIZER_LIB=... ./cpp/build/cuopt_remote_server -p 8765 -w 2 +CUOPT_SERIALIZER_LIB=... ./cpp/build/cuopt_cli problem.mps +``` + +See `docs/developer/SERIALIZATION_PLUGIN_GUIDE.md` for implementation details. + +## Worker Monitoring + +The server automatically monitors worker processes: +- Detects worker death via `waitpid` +- Automatically restarts dead workers +- Marks in-progress jobs as FAILED if worker dies +- Logs worker lifecycle events + +## Files and Components + +| Component | Location | +|-----------|----------| +| Server executable | `cpp/cuopt_remote_server.cpp` | +| Client logic | `cpp/src/linear_programming/utilities/remote_solve.cu` | +| Serialization interface | `cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp` | +| Protobuf serializer | `cpp/src/linear_programming/utilities/protobuf_serializer.cu` | +| Protobuf schema | `cpp/src/linear_programming/utilities/cuopt_remote.proto` | +| MsgPack serializer (example) | `cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp` | + +## Troubleshooting + +### Library Loading Issues (Development) + +If you're developing and the wrong `libcuopt.so` is being loaded: + +```bash +# Use LD_PRELOAD to force loading local build +LD_PRELOAD=cpp/build/libcuopt.so ./cpp/build/cuopt_cli problem.mps +``` + +For production, use `build.sh` which sets up proper RPATH. + +### Server Not Responding + +1. Check server is running: `pgrep -af cuopt_remote_server` +2. Check port is listening: `ss -tlnp | grep 8765` +3. Check firewall allows connections + +### Job Stuck in PROCESSING + +- Worker may have crashed - check server logs +- Server will mark job as FAILED after detecting worker death + +## Environment Variables Reference + +| Variable | Description | Default | +|----------|-------------|---------| +| `CUOPT_REMOTE_HOST` | Server hostname/IP | (none - local solve) | +| `CUOPT_REMOTE_PORT` | Server port | (none - local solve) | +| `CUOPT_REMOTE_USE_SYNC` | Use sync mode if "1" | "0" (async) | +| `CUOPT_SERIALIZER_LIB` | Path to custom serializer | (uses protobuf) | diff --git a/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md new file mode 100644 index 000000000..33553ef5c --- /dev/null +++ b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md @@ -0,0 +1,236 @@ +# cuOpt Solution Memory Architecture + +This document describes how cuOpt manages solution data memory for both local GPU-based solving and remote CPU-only solving. + +## Overview + +cuOpt solutions can exist in either GPU memory (for local high-performance workflows) or CPU memory (for remote solve and CPU-only clients). The architecture supports both use cases efficiently. + +## Solution Classes + +### LP Solution: `optimization_problem_solution_t` + +Located in: `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp` + +**Key Data Members:** +```cpp +// GPU memory (primary storage for local solve) +rmm::device_uvector primal_solution_; +rmm::device_uvector dual_solution_; +rmm::device_uvector reduced_cost_; + +// CPU memory (used for remote solve or explicit host access) +std::vector primal_solution_host_; +std::vector dual_solution_host_; +std::vector reduced_cost_host_; + +// Scalars (always on host) +f_t objective_value_; +f_t dual_objective_value_; +f_t l2_primal_residual_; +f_t l2_dual_residual_; +f_t gap_; +i_t nb_iterations_; +f_t solve_time_; +pdlp_termination_status_t termination_status_; +error_type_t error_status_; +``` + +### MIP Solution: `mip_solution_t` + +Located in: `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp` + +**Key Data Members:** +```cpp +// GPU memory (primary storage for local solve) +rmm::device_uvector solution_; +std::vector> solution_pool_; + +// CPU memory (used for remote solve) +std::vector solution_host_; +std::vector> solution_pool_host_; + +// Scalars (always on host) +f_t objective_; +f_t mip_gap_; +f_t max_constraint_violation_; +f_t max_int_violation_; +f_t max_variable_bound_violation_; +mip_termination_status_t termination_status_; +error_type_t error_status_; +``` + +## Memory Management Strategy + +### Local Solve (GPU) + +When solving locally on a GPU: + +1. **Solver computes** → Results in GPU memory (`device_uvector`) +2. **Solution returned** → Contains GPU buffers +3. **User accesses** → Can work directly with GPU data or copy to host as needed + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Solver │ ──► │ Solution │ ──► │ User │ +│ (GPU) │ │ (GPU mem) │ │ (GPU/CPU) │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +### Remote Solve (CPU-only client) + +When solving remotely from a CPU-only machine: + +1. **Client sends** → Problem data serialized and sent to server +2. **Server solves** → Results computed on GPU +3. **`to_host()` called** → GPU data copied to CPU memory +4. **Solution serialized** → CPU data sent back to client +5. **Client receives** → Solution with CPU memory only + +``` +┌──────────┐ ┌──────────────────────────────────────────┐ ┌──────────┐ +│ Client │ ──► │ SERVER │ ◄── │ Client │ +│ (no GPU) │ │ GPU solve → to_host() → serialize │ │(solution)│ +└──────────┘ └──────────────────────────────────────────┘ └──────────┘ +``` + +## The `to_host()` Method + +Both solution classes provide a `to_host()` method that copies GPU data to CPU: + +```cpp +// LP Solution +void optimization_problem_solution_t::to_host(rmm::cuda_stream_view stream_view) +{ + if (primal_solution_.size() > 0) { + primal_solution_host_.resize(primal_solution_.size()); + raft::copy(primal_solution_host_.data(), primal_solution_.data(), + primal_solution_.size(), stream_view); + } + // ... similar for dual_solution_, reduced_cost_ + stream_view.synchronize(); +} + +// MIP Solution +void mip_solution_t::to_host(rmm::cuda_stream_view stream_view) +{ + if (solution_.size() > 0) { + solution_host_.resize(solution_.size()); + raft::copy(solution_host_.data(), solution_.data(), + solution_.size(), stream_view); + } + // ... similar for solution_pool_ + stream_view.synchronize(); +} +``` + +### When to Call `to_host()` + +- **Server-side remote solve**: Called before serializing solution for network transmission +- **Client accessing host data**: If user needs `std::vector` access to solution data +- **Writing to files**: When saving solutions to disk + +### Performance Considerations + +The `to_host()` copy adds overhead, but: +- Only called when CPU access is actually needed +- GPU computation dominates solve time for non-trivial problems +- One-time cost after solve completes + +**Typical overhead**: Negligible for problems with thousands of variables. For a 10,000-variable problem, copying ~80KB takes <1ms. + +## Accessor Methods + +### GPU Accessors (for local solve) + +```cpp +// LP +const rmm::device_uvector& get_primal_solution() const; +const rmm::device_uvector& get_dual_solution() const; +const rmm::device_uvector& get_reduced_cost() const; + +// MIP +const rmm::device_uvector& get_solution() const; +``` + +### CPU Accessors (for remote solve) + +```cpp +// LP +const std::vector& get_primal_solution_host() const; +const std::vector& get_dual_solution_host() const; +const std::vector& get_reduced_cost_host() const; + +// MIP +const std::vector& get_solution_host() const; +``` + +### Checking Memory Location + +```cpp +// Returns true if solution data is on GPU +bool is_device_memory() const; +``` + +## Usage in Remote Solve Server + +The server calls `to_host()` before serialization: + +```cpp +// In cuopt_remote_server.cpp +if (is_mip) { + mip_solution_t solution = solve_mip(...); + solution.to_host(stream); // Copy GPU → CPU + result_data = serializer->serialize_mip_solution(solution); +} else { + optimization_problem_solution_t solution = solve_lp(...); + solution.to_host(stream); // Copy GPU → CPU + result_data = serializer->serialize_lp_solution(solution); +} +``` + +## Design Rationale + +### Why Not Pure CPU Memory? + +An earlier design considered using only `std::vector` for solutions. We chose the hybrid approach because: + +1. **GPU performance**: Local solves benefit from keeping data on GPU +2. **Minimize changes**: Existing GPU-based code continues to work unchanged +3. **Flexibility**: Users can choose GPU or CPU access as needed + +### Why Not Pure GPU Memory? + +Pure GPU memory would fail for: + +1. **Remote solve**: CPU-only clients need CPU data +2. **Serialization**: Network transmission requires CPU memory +3. **File I/O**: Writing to disk typically uses CPU + +### Hybrid Approach Benefits + +- ✅ Local GPU workflows remain efficient +- ✅ Remote solve works with CPU-only clients +- ✅ Minimal code changes to existing solvers +- ✅ On-demand copy (only when needed) +- ✅ Clear separation of concerns + +## Files Involved + +| File | Description | +|------|-------------| +| `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp` | LP solution class declaration | +| `cpp/src/linear_programming/solver_solution.cu` | LP solution implementation + `to_host()` | +| `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp` | MIP solution class declaration | +| `cpp/src/mip/solver_solution.cu` | MIP solution implementation + `to_host()` | +| `cpp/cuopt_remote_server.cpp` | Server calls `to_host()` before serialization | +| `cpp/src/linear_programming/utilities/protobuf_serializer.cu` | Uses host accessors for serialization | + +## Summary + +The cuOpt solution memory architecture uses a **hybrid GPU/CPU approach**: + +1. **Primary storage**: GPU (`device_uvector`) for local solve performance +2. **Secondary storage**: CPU (`std::vector`) for remote solve and host access +3. **On-demand copying**: `to_host()` method copies GPU → CPU when needed +4. **Transparent to users**: Local users get GPU data, remote users get CPU data automatically From 729f55f3e473718351c82a73db387f97da385412 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Thu, 8 Jan 2026 15:49:48 -0500 Subject: [PATCH 16/37] Add job cancellation and WAIT_FOR_RESULT to remote server Features: - CANCEL_JOB: Cancel queued jobs (marked cancelled, workers skip them) or running jobs (worker killed via SIGKILL, auto-restarted) - WAIT_FOR_RESULT: Blocking wait using condition variables (no polling) Server thread sleeps until job completes, fails, or is cancelled - cuopt_cancel_job: Standalone CLI utility to cancel jobs by ID Protocol changes: - Added CANCEL_JOB (type 5) and WAIT_FOR_RESULT (type 6) request types - Added CANCELLED (5) to JobStatus enum - Added CancelResponse message to protobuf schema Server changes: - Added cancelled flag to shared memory JobQueueEntry - Workers check cancelled flag before processing claimed jobs - Cancel handler kills worker process for running jobs - wait_for_result() uses JobWaiter condition variables - Fixed check_job_status() to detect PROCESSING state - Thread-per-connection model (already present) enables blocking waits Serialization: - Added cancel methods to remote_serialization.hpp base class - Implemented in both protobuf and msgpack serializers - Added MSG_WAIT_REQUEST (17) to msgpack message types Documentation: - Added docs/remote_solve_architecture.md for security review - Covers protocol, process model, data flow, security considerations --- cpp/CMakeLists.txt | 32 +- cpp/cuopt_cancel_job.cpp | 193 ++++++ cpp/cuopt_remote_server.cpp | 331 ++++++++++- .../utilities/remote_serialization.hpp | 43 +- .../utilities/cuopt_remote.proto | 11 + .../utilities/protobuf_serializer.cu | 73 ++- .../serializers/msgpack_serializer.cpp | 78 ++- docs/remote_solve_architecture.md | 556 ++++++++++++++++++ 8 files changed, 1275 insertions(+), 42 deletions(-) create mode 100644 cpp/cuopt_cancel_job.cpp create mode 100644 docs/remote_solve_architecture.md diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 67468534f..dddc640ea 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -588,42 +588,28 @@ if(BUILD_REMOTE_SERVER AND NOT BUILD_LP_ONLY) RUNTIME DESTINATION ${_BIN_DEST} ) - # Standalone remote solve test client - add_executable(remote_solve_test tests/linear_programming/remote_solve_test.cpp) - target_compile_options(remote_solve_test + # cuopt_cancel_job - standalone utility to cancel jobs + add_executable(cuopt_cancel_job cuopt_cancel_job.cpp) + target_compile_options(cuopt_cancel_job PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" ) - target_include_directories(remote_solve_test + target_include_directories(cuopt_cancel_job PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" "${CMAKE_CURRENT_BINARY_DIR}/include" ) - target_link_libraries(remote_solve_test + target_link_libraries(cuopt_cancel_job PUBLIC cuopt ) - # Use RUNPATH so LD_LIBRARY_PATH can override during development - target_link_options(remote_solve_test PRIVATE -Wl,--enable-new-dtags) + target_link_options(cuopt_cancel_job PRIVATE -Wl,--enable-new-dtags) - # Local solve test (for debugging) - add_executable(local_solve_test tests/linear_programming/local_solve_test.cpp) - target_compile_options(local_solve_test - PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" - ) - target_include_directories(local_solve_test - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CMAKE_CURRENT_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include" - "${CMAKE_CURRENT_BINARY_DIR}/include" - ) - target_link_libraries(local_solve_test - PUBLIC - cuopt + install(TARGETS cuopt_cancel_job + COMPONENT runtime + RUNTIME DESTINATION ${_BIN_DEST} ) - target_link_options(local_solve_test PRIVATE -Wl,--enable-new-dtags) endif() # ################################################################################################## diff --git a/cpp/cuopt_cancel_job.cpp b/cpp/cuopt_cancel_job.cpp new file mode 100644 index 000000000..edd786530 --- /dev/null +++ b/cpp/cuopt_cancel_job.cpp @@ -0,0 +1,193 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file cuopt_cancel_job.cpp + * @brief Standalone utility to cancel jobs on a cuopt_remote_server + * + * Usage: + * cuopt_cancel_job [-h host] [-p port] + * + * Examples: + * cuopt_cancel_job job_1234567890abcdef + * cuopt_cancel_job job_1234567890abcdef -h 192.168.1.100 -p 9090 + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cuopt::linear_programming; + +static bool write_all(int sockfd, const void* data, size_t size) +{ + const uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t written = ::write(sockfd, ptr, remaining); + if (written <= 0) return false; + ptr += written; + remaining -= written; + } + return true; +} + +static bool read_all(int sockfd, void* data, size_t size) +{ + uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t nread = ::read(sockfd, ptr, remaining); + if (nread <= 0) return false; + ptr += nread; + remaining -= nread; + } + return true; +} + +void print_usage(const char* prog) +{ + std::cout << "Usage: " << prog << " [options]\n" + << "\n" + << "Cancel a job on a cuopt_remote_server.\n" + << "\n" + << "Arguments:\n" + << " job_id The job ID to cancel\n" + << "\n" + << "Options:\n" + << " -h HOST Server hostname (default: localhost)\n" + << " -p PORT Server port (default: 9090)\n" + << " --help Show this help message\n" + << "\n" + << "Environment Variables:\n" + << " CUOPT_REMOTE_HOST Default server host\n" + << " CUOPT_REMOTE_PORT Default server port\n" + << "\n" + << "Examples:\n" + << " " << prog << " job_1234567890abcdef\n" + << " " << prog << " job_1234567890abcdef -h 192.168.1.100 -p 9090\n"; +} + +int main(int argc, char** argv) +{ + // Parse arguments + std::string job_id; + std::string host = "localhost"; + int port = 9090; + + // Check environment variables first + const char* env_host = std::getenv("CUOPT_REMOTE_HOST"); + const char* env_port = std::getenv("CUOPT_REMOTE_PORT"); + if (env_host && env_host[0]) { host = env_host; } + if (env_port && env_port[0]) { port = std::atoi(env_port); } + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--help") == 0) { + print_usage(argv[0]); + return 0; + } else if (strcmp(argv[i], "-h") == 0 && i + 1 < argc) { + host = argv[++i]; + } else if (strcmp(argv[i], "-p") == 0 && i + 1 < argc) { + port = std::stoi(argv[++i]); + } else if (argv[i][0] != '-') { + job_id = argv[i]; + } + } + + if (job_id.empty()) { + std::cerr << "Error: No job_id specified\n\n"; + print_usage(argv[0]); + return 1; + } + + // Connect to server + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + std::cerr << "Error: Failed to create socket\n"; + return 1; + } + + struct sockaddr_in server_addr; + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(port); + + if (inet_pton(AF_INET, host.c_str(), &server_addr.sin_addr) <= 0) { + // Try hostname resolution + struct hostent* he = gethostbyname(host.c_str()); + if (he == nullptr) { + std::cerr << "Error: Invalid host: " << host << "\n"; + close(sockfd); + return 1; + } + memcpy(&server_addr.sin_addr, he->h_addr_list[0], he->h_length); + } + + if (connect(sockfd, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) { + std::cerr << "Error: Failed to connect to " << host << ":" << port << "\n"; + close(sockfd); + return 1; + } + + // Send cancel request + auto serializer = get_serializer(); + auto request_bytes = serializer->serialize_cancel_request(job_id); + + uint32_t size = request_bytes.size(); + if (!write_all(sockfd, &size, sizeof(size)) || + !write_all(sockfd, request_bytes.data(), request_bytes.size())) { + std::cerr << "Error: Failed to send cancel request\n"; + close(sockfd); + return 1; + } + + // Receive response + if (!read_all(sockfd, &size, sizeof(size))) { + std::cerr << "Error: Failed to receive response size\n"; + close(sockfd); + return 1; + } + + std::vector response_bytes(size); + if (!read_all(sockfd, response_bytes.data(), size)) { + std::cerr << "Error: Failed to receive response\n"; + close(sockfd); + return 1; + } + + close(sockfd); + + // Parse response + auto result = serializer->deserialize_cancel_response(response_bytes); + + // Print result + std::cout << "Job ID: " << job_id << "\n"; + std::cout << "Result: " << (result.success ? "SUCCESS" : "FAILED") << "\n"; + std::cout << "Message: " << result.message << "\n"; + + const char* status_str = "UNKNOWN"; + using job_status_t = remote_serializer_t::job_status_t; + switch (result.job_status) { + case job_status_t::QUEUED: status_str = "QUEUED"; break; + case job_status_t::PROCESSING: status_str = "PROCESSING"; break; + case job_status_t::COMPLETED: status_str = "COMPLETED"; break; + case job_status_t::FAILED: status_str = "FAILED"; break; + case job_status_t::NOT_FOUND: status_str = "NOT_FOUND"; break; + case job_status_t::CANCELLED: status_str = "CANCELLED"; break; + } + std::cout << "Job Status: " << status_str << "\n"; + + return result.success ? 0 : 1; +} diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 990853fce..b19c51580 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -71,6 +71,7 @@ struct JobQueueEntry { std::atomic ready; // Job is ready to be processed std::atomic claimed; // Worker has claimed this job std::atomic worker_pid; // PID of worker that claimed this job (0 if none) + std::atomic cancelled; // Job has been cancelled (worker should skip) }; struct ResultQueueEntry { @@ -201,7 +202,7 @@ class stdout_streamer_t { // Job status tracking (main process only) // ============================================================================ -enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; +enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND, CANCELLED }; struct JobInfo { std::string job_id; @@ -367,6 +368,7 @@ bool init_shared_memory() job_queue[i].ready = false; job_queue[i].claimed = false; job_queue[i].worker_pid = 0; + job_queue[i].cancelled = false; } // Create result queue shared memory @@ -486,6 +488,35 @@ void worker_process(int worker_id) std::string job_id(job.job_id); bool is_mip = (job.problem_type == 1); + // Check if job was cancelled before we start processing + if (job.cancelled) { + std::cout << "[Worker " << worker_id << "] Job cancelled before processing: " << job_id + << "\n"; + std::cout.flush(); + + // Store cancelled result in result queue + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (!result_queue[i].ready) { + strncpy(result_queue[i].job_id, job_id.c_str(), sizeof(result_queue[i].job_id) - 1); + result_queue[i].status = 2; // Cancelled status + result_queue[i].data_size = 0; + strncpy(result_queue[i].error_message, + "Job was cancelled", + sizeof(result_queue[i].error_message) - 1); + result_queue[i].retrieved = false; + result_queue[i].ready = true; + break; + } + } + + // Clear job slot (don't exit/restart worker) + job.worker_pid = 0; + job.ready = false; + job.claimed = false; + job.cancelled = false; + continue; // Go back to waiting for next job + } + std::cout << "[Worker " << worker_id << "] Processing job: " << job_id << " (type: " << (is_mip ? "MIP" : "LP") << ")\n"; std::cout.flush(); @@ -564,6 +595,7 @@ void worker_process(int worker_id) job.worker_pid = 0; job.ready = false; job.claimed = false; + job.cancelled = false; std::cout << "[Worker " << worker_id << "] Completed job: " << job_id << " (success: " << success << ")\n"; @@ -615,23 +647,30 @@ pid_t spawn_single_worker(int worker_id) return pid; } -// Mark jobs being processed by a dead worker as failed +// Mark jobs being processed by a dead worker as failed (or cancelled if it was cancelled) void mark_worker_jobs_failed(pid_t dead_worker_pid) { for (size_t i = 0; i < MAX_JOBS; ++i) { if (job_queue[i].ready && job_queue[i].claimed && job_queue[i].worker_pid == dead_worker_pid) { std::string job_id(job_queue[i].job_id); - std::cerr << "[Server] Worker " << dead_worker_pid << " died while processing job: " << job_id - << "\n"; + bool was_cancelled = job_queue[i].cancelled; - // Store failure result in result queue + if (was_cancelled) { + std::cerr << "[Server] Worker " << dead_worker_pid + << " killed for cancelled job: " << job_id << "\n"; + } else { + std::cerr << "[Server] Worker " << dead_worker_pid + << " died while processing job: " << job_id << "\n"; + } + + // Store result in result queue (cancelled or failed) for (size_t j = 0; j < MAX_RESULTS; ++j) { if (!result_queue[j].ready) { strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1); - result_queue[j].status = 1; // Error + result_queue[j].status = was_cancelled ? 2 : 1; // 2=cancelled, 1=error result_queue[j].data_size = 0; strncpy(result_queue[j].error_message, - "Worker process died unexpectedly", + was_cancelled ? "Job was cancelled" : "Worker process died unexpectedly", sizeof(result_queue[j].error_message) - 1); result_queue[j].retrieved = false; result_queue[j].ready = true; @@ -643,14 +682,20 @@ void mark_worker_jobs_failed(pid_t dead_worker_pid) job_queue[i].worker_pid = 0; job_queue[i].ready = false; job_queue[i].claimed = false; + job_queue[i].cancelled = false; // Update job tracker { std::lock_guard lock(tracker_mutex); auto it = job_tracker.find(job_id); if (it != job_tracker.end()) { - it->second.status = JobStatus::FAILED; - it->second.error_message = "Worker process died unexpectedly"; + if (was_cancelled) { + it->second.status = JobStatus::CANCELLED; + it->second.error_message = "Job was cancelled"; + } else { + it->second.status = JobStatus::FAILED; + it->second.error_message = "Worker process died unexpectedly"; + } } } } @@ -736,7 +781,9 @@ void result_retrieval_thread() for (size_t i = 0; i < MAX_RESULTS; ++i) { if (result_queue[i].ready && !result_queue[i].retrieved) { std::string job_id(result_queue[i].job_id); - bool success = (result_queue[i].status == 0); + uint32_t result_status = result_queue[i].status; + bool success = (result_status == 0); + bool cancelled = (result_status == 2); std::vector result_data; std::string error_message; @@ -771,6 +818,9 @@ void result_retrieval_thread() if (success) { it->second.status = JobStatus::COMPLETED; it->second.result_data = result_data; + } else if (cancelled) { + it->second.status = JobStatus::CANCELLED; + it->second.error_message = error_message; } else { it->second.status = JobStatus::FAILED; it->second.error_message = error_message; @@ -814,6 +864,7 @@ std::pair submit_job_async(const std::vector& reques memcpy(job_queue[i].data, request_data.data(), request_data.size()); job_queue[i].worker_pid = 0; job_queue[i].claimed = false; + job_queue[i].cancelled = false; job_queue[i].ready = true; // Mark as ready last // Track job @@ -848,11 +899,24 @@ JobStatus check_job_status(const std::string& job_id, std::string& message) return JobStatus::NOT_FOUND; } + // If status is QUEUED, check if the job has been claimed by a worker + // (which means it's now PROCESSING) + if (it->second.status == JobStatus::QUEUED) { + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (job_queue[i].ready && job_queue[i].claimed && + std::string(job_queue[i].job_id) == job_id) { + it->second.status = JobStatus::PROCESSING; + break; + } + } + } + switch (it->second.status) { case JobStatus::QUEUED: message = "Job is queued"; break; case JobStatus::PROCESSING: message = "Job is being processed"; break; case JobStatus::COMPLETED: message = "Job completed"; break; case JobStatus::FAILED: message = "Job failed: " + it->second.error_message; break; + case JobStatus::CANCELLED: message = "Job was cancelled"; break; default: message = "Unknown status"; } @@ -884,6 +948,73 @@ bool get_job_result(const std::string& job_id, } } +// Wait for job to complete (blocking) +// This uses condition variables - the thread will sleep until the job is done +bool wait_for_result(const std::string& job_id, + std::vector& result_data, + std::string& error_message) +{ + // First check if job already completed + { + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + if (it == job_tracker.end()) { + error_message = "Job ID not found"; + return false; + } + + // If already in terminal state, return immediately + if (it->second.status == JobStatus::COMPLETED) { + result_data = it->second.result_data; + return true; + } else if (it->second.status == JobStatus::FAILED) { + error_message = it->second.error_message; + return false; + } else if (it->second.status == JobStatus::CANCELLED) { + error_message = "Job was cancelled"; + return false; + } + } + + // Job is still running - create a waiter and wait on condition variable + auto waiter = std::make_shared(); + + { + std::lock_guard lock(waiters_mutex); + waiting_threads[job_id] = waiter; + } + + if (config.verbose) { + std::cout << "[Server] WAIT_FOR_RESULT: waiting for job " << job_id << "\n"; + } + + // Wait on the condition variable - this thread will sleep until signaled + { + std::unique_lock lock(waiter->mutex); + waiter->cv.wait(lock, [&waiter] { return waiter->ready; }); + } + + // Remove from waiting_threads + { + std::lock_guard lock(waiters_mutex); + waiting_threads.erase(job_id); + } + + if (config.verbose) { + std::cout << "[Server] WAIT_FOR_RESULT: job " << job_id + << " completed, success=" << waiter->success << "\n"; + } + + if (waiter->success) { + result_data = std::move(waiter->result_data); + return true; + } else { + error_message = waiter->error_message; + return false; + } +} + // ============================================================================ // Log File Management // ============================================================================ @@ -926,6 +1057,121 @@ bool delete_job(const std::string& job_id) return true; } +// Cancel job - returns: 0=success, 1=job_not_found, 2=already_completed, 3=already_cancelled +// Also returns the job's status after cancel attempt via job_status_out +int cancel_job(const std::string& job_id, JobStatus& job_status_out, std::string& message) +{ + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + if (it == job_tracker.end()) { + message = "Job ID not found"; + job_status_out = JobStatus::NOT_FOUND; + return 1; + } + + JobStatus current_status = it->second.status; + + // Can't cancel completed jobs + if (current_status == JobStatus::COMPLETED) { + message = "Cannot cancel completed job"; + job_status_out = JobStatus::COMPLETED; + return 2; + } + + // Already cancelled + if (current_status == JobStatus::CANCELLED) { + message = "Job already cancelled"; + job_status_out = JobStatus::CANCELLED; + return 3; + } + + // Can't cancel failed jobs + if (current_status == JobStatus::FAILED) { + message = "Cannot cancel failed job"; + job_status_out = JobStatus::FAILED; + return 2; + } + + // Find the job in the shared memory queue + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (job_queue[i].ready && strcmp(job_queue[i].job_id, job_id.c_str()) == 0) { + // Check if job is being processed by a worker + pid_t worker_pid = job_queue[i].worker_pid; + + if (worker_pid > 0 && job_queue[i].claimed) { + // Job is being processed - kill the worker + if (config.verbose) { + std::cout << "[Server] Cancelling running job " << job_id << " (killing worker " + << worker_pid << ")\n"; + } + kill(worker_pid, SIGKILL); + // The worker monitor thread will detect the dead worker, restart it, + // and mark_worker_jobs_failed will be called. But we want CANCELLED not FAILED. + // So we mark it as cancelled here first. + job_queue[i].cancelled = true; + } else { + // Job is queued but not yet claimed - mark as cancelled + if (config.verbose) { std::cout << "[Server] Cancelling queued job " << job_id << "\n"; } + job_queue[i].cancelled = true; + } + + // Update job tracker + it->second.status = JobStatus::CANCELLED; + it->second.error_message = "Job cancelled by user"; + job_status_out = JobStatus::CANCELLED; + message = "Job cancelled successfully"; + + // Delete the log file for this job + delete_log_file(job_id); + + // Wake up any threads waiting for this job + { + std::lock_guard wlock(waiters_mutex); + auto wit = waiting_threads.find(job_id); + if (wit != waiting_threads.end()) { + auto waiter = wit->second; + waiter->error_message = "Job cancelled by user"; + waiter->success = false; + waiter->ready = true; + waiter->cv.notify_one(); + } + } + + return 0; + } + } + + // Job not found in queue (might have already finished processing) + // Re-check status since we hold the lock + if (it->second.status == JobStatus::COMPLETED) { + message = "Cannot cancel completed job"; + job_status_out = JobStatus::COMPLETED; + return 2; + } + + // Job must be in flight or in an edge case - mark as cancelled anyway + it->second.status = JobStatus::CANCELLED; + it->second.error_message = "Job cancelled by user"; + job_status_out = JobStatus::CANCELLED; + message = "Job cancelled"; + + // Wake up any threads waiting for this job + { + std::lock_guard wlock(waiters_mutex); + auto wit = waiting_threads.find(job_id); + if (wit != waiting_threads.end()) { + auto waiter = wit->second; + waiter->error_message = "Job cancelled by user"; + waiter->success = false; + waiter->ready = true; + waiter->cv.notify_one(); + } + } + + return 0; +} + // ============================================================================ // Sync Mode Handler (with log streaming) // ============================================================================ @@ -1071,6 +1317,7 @@ void handle_client(int client_fd, bool stream_logs) case JobStatus::COMPLETED: status_code = 2; break; case JobStatus::FAILED: status_code = 3; break; case JobStatus::NOT_FOUND: status_code = 4; break; + case JobStatus::CANCELLED: status_code = 5; break; } auto response = serializer->serialize_status_response(status_code, message); @@ -1140,6 +1387,59 @@ void handle_client(int client_fd, bool stream_logs) std::cout << "[Server] GET_LOGS: job=" << job_id << ", frombyte=" << frombyte << ", lines=" << log_lines.size() << ", nbytes=" << nbytes << "\n"; } + } else if (request_type == 5) { // CANCEL_JOB + std::string job_id = serializer->get_job_id(request_data); + + JobStatus job_status_out; + std::string message; + int result = cancel_job(job_id, job_status_out, message); + + // Convert JobStatus to status code + int status_code = 0; + switch (job_status_out) { + case JobStatus::QUEUED: status_code = 0; break; + case JobStatus::PROCESSING: status_code = 1; break; + case JobStatus::COMPLETED: status_code = 2; break; + case JobStatus::FAILED: status_code = 3; break; + case JobStatus::NOT_FOUND: status_code = 4; break; + case JobStatus::CANCELLED: status_code = 5; break; + } + + bool success = (result == 0); + auto response = serializer->serialize_cancel_response(success, message, status_code); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + + if (config.verbose) { + std::cout << "[Server] CANCEL_JOB: job=" << job_id << ", success=" << success + << ", message=" << message << "\n"; + } + } else if (request_type == 6) { // WAIT_FOR_RESULT + std::string job_id = serializer->get_job_id(request_data); + + if (config.verbose) { + std::cout << "[Server] WAIT_FOR_RESULT: job=" << job_id << " (blocking until complete)\n"; + } + + std::vector result_data; + std::string error_message; + + // This will block until the job completes (uses condition variable, no polling) + bool success = wait_for_result(job_id, result_data, error_message); + + // Send result response (same format as GET_RESULT) + auto response = serializer->serialize_result_response(success, result_data, error_message); + + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + + if (config.verbose) { + std::cout << "[Server] WAIT_FOR_RESULT: job=" << job_id << " completed, success=" << success + << "\n"; + } } close(client_fd); @@ -1203,10 +1503,13 @@ int main(int argc, char** argv) std::cout << "Log streaming: " << (config.stream_logs ? "enabled" : "disabled") << "\n"; std::cout << "\n"; std::cout << "Async API:\n"; - std::cout << " SUBMIT_JOB - Submit a job, get job_id\n"; - std::cout << " CHECK_STATUS - Check job status\n"; - std::cout << " GET_RESULT - Retrieve completed result\n"; - std::cout << " DELETE_RESULT - Delete job from server\n"; + std::cout << " SUBMIT_JOB - Submit a job, get job_id\n"; + std::cout << " CHECK_STATUS - Check job status\n"; + std::cout << " GET_RESULT - Retrieve completed result\n"; + std::cout << " DELETE_RESULT - Delete job from server\n"; + std::cout << " GET_LOGS - Retrieve log output\n"; + std::cout << " CANCEL_JOB - Cancel a queued or running job\n"; + std::cout << " WAIT_FOR_RESULT - Block until job completes (no polling)\n"; std::cout << "\n"; // Initialize shared memory diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp index ff28e04bc..c7bf61da0 100644 --- a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp @@ -205,10 +205,18 @@ class remote_serializer_t { virtual std::vector serialize_get_logs_request(const std::string& job_id, int64_t frombyte = 0) = 0; + /** + * @brief Serialize a cancel job request. + * + * @param job_id The job ID to cancel + * @return Serialized request bytes + */ + virtual std::vector serialize_cancel_request(const std::string& job_id) = 0; + /** * @brief Job status enumeration. */ - enum class job_status_t { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND }; + enum class job_status_t { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND, CANCELLED }; /** * @brief Structure to hold log retrieval results. @@ -219,6 +227,15 @@ class remote_serializer_t { bool job_exists; ///< False if job_id not found }; + /** + * @brief Structure to hold cancel response results. + */ + struct cancel_result_t { + bool success; ///< True if cancel request was processed + std::string message; ///< Success/error message + job_status_t job_status; ///< Status of job after cancel attempt + }; + /** * @brief Deserialize job submission response. * @@ -265,6 +282,14 @@ class remote_serializer_t { */ virtual logs_result_t deserialize_logs_response(const std::vector& data) = 0; + /** + * @brief Deserialize cancel response. + * + * @param data Response bytes + * @return Cancel result structure + */ + virtual cancel_result_t deserialize_cancel_response(const std::vector& data) = 0; + //============================================================================ // Server-side Async Request Handling //============================================================================ @@ -283,7 +308,8 @@ class remote_serializer_t { * @brief Get the async request type. * * @param data The serialized request bytes - * @return Request type: 0=SUBMIT_JOB, 1=CHECK_STATUS, 2=GET_RESULT, 3=DELETE_RESULT + * @return Request type: 0=SUBMIT_JOB, 1=CHECK_STATUS, 2=GET_RESULT, 3=DELETE_RESULT, + * 4=GET_LOGS, 5=CANCEL_JOB, 6=WAIT_FOR_RESULT */ virtual int get_async_request_type(const std::vector& data) = 0; @@ -373,6 +399,19 @@ class remote_serializer_t { int64_t nbytes, bool job_exists) = 0; + /** + * @brief Serialize a cancel response. + * + * @param success Whether cancel was successful + * @param message Success/error message + * @param status_code Job status after cancel: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, + * 4=NOT_FOUND, 5=CANCELLED + * @return Serialized response bytes + */ + virtual std::vector serialize_cancel_response(bool success, + const std::string& message, + int status_code) = 0; + //============================================================================ // Metadata //============================================================================ diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto index 35b7b0584..8f9af6bda 100644 --- a/cpp/src/linear_programming/utilities/cuopt_remote.proto +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -203,6 +203,8 @@ enum RequestType { GET_RESULT = 2; // Retrieve completed result DELETE_RESULT = 3; // Delete result from server GET_LOGS = 4; // Retrieve buffered log entries + CANCEL_JOB = 5; // Cancel a queued or running job + WAIT_FOR_RESULT = 6; // Block until job completes, returns result } // Job status for async operations @@ -212,6 +214,7 @@ enum JobStatus { COMPLETED = 2; // Job completed successfully FAILED = 3; // Job failed with error NOT_FOUND = 4; // Job ID not found + CANCELLED = 5; // Job was cancelled by user } // Generic request wrapper for async operations @@ -268,6 +271,13 @@ message LogsResponse { bool job_exists = 5; // False if job_id not found } +// Response for cancel job +message CancelResponse { + ResponseStatus status = 1; + string message = 2; + JobStatus job_status = 3; // Status of job after cancel attempt +} + // Generic response wrapper message AsyncResponse { RequestType request_type = 1; @@ -278,6 +288,7 @@ message AsyncResponse { ResultResponse result_response = 12; DeleteResponse delete_response = 13; LogsResponse logs_response = 14; + CancelResponse cancel_response = 15; } } diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 058a30b13..685f5cade 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -382,6 +382,17 @@ class protobuf_serializer_t : public remote_serializer_t { return result; } + std::vector serialize_cancel_request(const std::string& job_id) override + { + cuopt::remote::AsyncRequest request; + request.set_request_type(cuopt::remote::CANCEL_JOB); + request.set_job_id(job_id); + + std::vector result(request.ByteSizeLong()); + request.SerializeToArray(result.data(), result.size()); + return result; + } + bool deserialize_submit_response(const std::vector& data, std::string& job_id, std::string& error_message) override @@ -419,6 +430,7 @@ class protobuf_serializer_t : public remote_serializer_t { case cuopt::remote::PROCESSING: return job_status_t::PROCESSING; case cuopt::remote::COMPLETED: return job_status_t::COMPLETED; case cuopt::remote::FAILED: return job_status_t::FAILED; + case cuopt::remote::CANCELLED: return job_status_t::CANCELLED; case cuopt::remote::NOT_FOUND: default: return job_status_t::NOT_FOUND; } @@ -504,6 +516,36 @@ class protobuf_serializer_t : public remote_serializer_t { return result; } + typename remote_serializer_t::cancel_result_t deserialize_cancel_response( + const std::vector& data) override + { + typename remote_serializer_t::cancel_result_t result; + result.success = false; + result.message = "Failed to parse response"; + result.job_status = job_status_t::NOT_FOUND; + + cuopt::remote::AsyncResponse response; + if (!response.ParseFromArray(data.data(), data.size()) || !response.has_cancel_response()) { + return result; + } + + const auto& cancel = response.cancel_response(); + result.success = (cancel.status() == cuopt::remote::SUCCESS); + result.message = cancel.message(); + + switch (cancel.job_status()) { + case cuopt::remote::QUEUED: result.job_status = job_status_t::QUEUED; break; + case cuopt::remote::PROCESSING: result.job_status = job_status_t::PROCESSING; break; + case cuopt::remote::COMPLETED: result.job_status = job_status_t::COMPLETED; break; + case cuopt::remote::FAILED: result.job_status = job_status_t::FAILED; break; + case cuopt::remote::CANCELLED: result.job_status = job_status_t::CANCELLED; break; + case cuopt::remote::NOT_FOUND: + default: result.job_status = job_status_t::NOT_FOUND; break; + } + + return result; + } + //============================================================================ // Server-side Async Request Handling //============================================================================ @@ -537,6 +579,8 @@ class protobuf_serializer_t : public remote_serializer_t { case cuopt::remote::GET_RESULT: return 2; case cuopt::remote::DELETE_RESULT: return 3; case cuopt::remote::GET_LOGS: return 4; + case cuopt::remote::CANCEL_JOB: return 5; + case cuopt::remote::WAIT_FOR_RESULT: return 6; default: return -1; } } @@ -612,7 +656,8 @@ class protobuf_serializer_t : public remote_serializer_t { case 1: status->set_job_status(cuopt::remote::PROCESSING); break; case 2: status->set_job_status(cuopt::remote::COMPLETED); break; case 3: status->set_job_status(cuopt::remote::FAILED); break; - case 4: + case 4: status->set_job_status(cuopt::remote::NOT_FOUND); break; + case 5: status->set_job_status(cuopt::remote::CANCELLED); break; default: status->set_job_status(cuopt::remote::NOT_FOUND); break; } status->set_message(message); @@ -696,6 +741,32 @@ class protobuf_serializer_t : public remote_serializer_t { return bytes; } + std::vector serialize_cancel_response(bool success, + const std::string& message, + int status_code) override + { + cuopt::remote::AsyncResponse response; + response.set_request_type(cuopt::remote::CANCEL_JOB); + + auto* cancel = response.mutable_cancel_response(); + cancel->set_status(success ? cuopt::remote::SUCCESS : cuopt::remote::ERROR_INTERNAL); + cancel->set_message(message); + + switch (status_code) { + case 0: cancel->set_job_status(cuopt::remote::QUEUED); break; + case 1: cancel->set_job_status(cuopt::remote::PROCESSING); break; + case 2: cancel->set_job_status(cuopt::remote::COMPLETED); break; + case 3: cancel->set_job_status(cuopt::remote::FAILED); break; + case 4: cancel->set_job_status(cuopt::remote::NOT_FOUND); break; + case 5: cancel->set_job_status(cuopt::remote::CANCELLED); break; + default: cancel->set_job_status(cuopt::remote::NOT_FOUND); break; + } + + std::vector bytes(response.ByteSizeLong()); + response.SerializeToArray(bytes.data(), bytes.size()); + return bytes; + } + //============================================================================ // Metadata //============================================================================ diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index e3e1a0334..02a4c649d 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -34,10 +34,13 @@ constexpr uint8_t MSG_STATUS_REQUEST = 12; constexpr uint8_t MSG_GET_RESULT = 13; constexpr uint8_t MSG_DELETE_REQUEST = 14; constexpr uint8_t MSG_GET_LOGS = 15; +constexpr uint8_t MSG_CANCEL_REQUEST = 16; +constexpr uint8_t MSG_WAIT_REQUEST = 17; constexpr uint8_t MSG_SUBMIT_RESPONSE = 20; constexpr uint8_t MSG_STATUS_RESPONSE = 21; constexpr uint8_t MSG_LOGS_RESPONSE = 22; +constexpr uint8_t MSG_CANCEL_RESPONSE = 23; template class msgpack_serializer_t : public remote_serializer_t { @@ -452,6 +455,17 @@ class msgpack_serializer_t : public remote_serializer_t { return std::vector(buffer.data(), buffer.data() + buffer.size()); } + std::vector serialize_cancel_request(const std::string& job_id) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_CANCEL_REQUEST); + pk.pack(false); // blocking (unused) + pk.pack(job_id); + pk.pack(int64_t(0)); // frombyte (unused) + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + bool deserialize_submit_response(const std::vector& data, std::string& job_id, std::string& error_message) override @@ -504,12 +518,14 @@ class msgpack_serializer_t : public remote_serializer_t { msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); int status = oh_status.get().as(); - // Status codes: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND + // Status codes: 0=QUEUED, 1=PROCESSING, 2=COMPLETED, 3=FAILED, 4=NOT_FOUND, 5=CANCELLED switch (status) { case 0: return job_status_t::QUEUED; case 1: return job_status_t::PROCESSING; case 2: return job_status_t::COMPLETED; case 3: return job_status_t::FAILED; + case 4: return job_status_t::NOT_FOUND; + case 5: return job_status_t::CANCELLED; default: return job_status_t::NOT_FOUND; } } catch (...) { @@ -549,6 +565,49 @@ class msgpack_serializer_t : public remote_serializer_t { return result; } + typename remote_serializer_t::cancel_result_t deserialize_cancel_response( + const std::vector& data) override + { + using job_status_t = typename remote_serializer_t::job_status_t; + typename remote_serializer_t::cancel_result_t result; + result.success = false; + result.message = "Failed to parse response"; + result.job_status = job_status_t::NOT_FOUND; + + try { + size_t offset = 0; + msgpack::object_handle oh_type = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + uint8_t msg_type = oh_type.get().as(); + + if (msg_type != MSG_CANCEL_RESPONSE) { return result; } + + msgpack::object_handle oh_success = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + result.success = oh_success.get().as(); + + msgpack::object_handle oh_message = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + result.message = oh_message.get().as(); + + msgpack::object_handle oh_status = + msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); + int status_code = oh_status.get().as(); + + switch (status_code) { + case 0: result.job_status = job_status_t::QUEUED; break; + case 1: result.job_status = job_status_t::PROCESSING; break; + case 2: result.job_status = job_status_t::COMPLETED; break; + case 3: result.job_status = job_status_t::FAILED; break; + case 4: result.job_status = job_status_t::NOT_FOUND; break; + case 5: result.job_status = job_status_t::CANCELLED; break; + default: result.job_status = job_status_t::NOT_FOUND; break; + } + } catch (...) { + } + return result; + } + optimization_problem_solution_t deserialize_lp_result_response( const std::vector& data) override { @@ -574,7 +633,7 @@ class msgpack_serializer_t : public remote_serializer_t { msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); if (oh.get().type == msgpack::type::POSITIVE_INTEGER) { uint8_t msg_type = oh.get().as(); - return msg_type >= MSG_ASYNC_LP_REQUEST && msg_type <= MSG_GET_LOGS; + return msg_type >= MSG_ASYNC_LP_REQUEST && msg_type <= MSG_CANCEL_REQUEST; } } catch (...) { } @@ -674,6 +733,8 @@ class msgpack_serializer_t : public remote_serializer_t { case MSG_GET_RESULT: return 2; // GET_RESULT case MSG_DELETE_REQUEST: return 3; // DELETE_RESULT case MSG_GET_LOGS: return 4; // GET_LOGS + case MSG_CANCEL_REQUEST: return 5; // CANCEL_JOB + case MSG_WAIT_REQUEST: return 6; // WAIT_FOR_RESULT default: return -1; } } catch (...) { @@ -762,6 +823,19 @@ class msgpack_serializer_t : public remote_serializer_t { return std::vector(buffer.data(), buffer.data() + buffer.size()); } + std::vector serialize_cancel_response(bool success, + const std::string& message, + int status_code) override + { + msgpack::sbuffer buffer; + msgpack::packer pk(&buffer); + pk.pack_uint8(MSG_CANCEL_RESPONSE); + pk.pack(success); + pk.pack(message); + pk.pack(status_code); + return std::vector(buffer.data(), buffer.data() + buffer.size()); + } + private: void pack_problem(msgpack::packer& pk, const mps_parser::data_model_view_t& view) diff --git a/docs/remote_solve_architecture.md b/docs/remote_solve_architecture.md new file mode 100644 index 000000000..157f93b17 --- /dev/null +++ b/docs/remote_solve_architecture.md @@ -0,0 +1,556 @@ +# cuOpt Remote Solve Architecture + +## Document Purpose + +This document describes the client-server architecture for cuOpt's remote solve capability. It is intended for security review and covers communication protocols, process architecture, data flow, and trust boundaries. + +--- + +## 1. System Overview + +The remote solve feature allows clients to submit optimization problems (LP/MIP) to a server for execution on GPU-accelerated hardware. The architecture supports both synchronous (blocking) and asynchronous (job-based) operation modes. + +### High-Level Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CLIENT PROCESS │ +│ ┌─────────────────┐ ┌──────────────────┐ ┌───────────────────────┐ │ +│ │ cuOpt Library │───▶│ Remote Serializer│───▶│ TCP Socket Client │ │ +│ │ (User Code) │ │ (Protobuf/Custom)│ │ │ │ +│ └─────────────────┘ └──────────────────┘ └───────────┬───────────┘ │ +└──────────────────────────────────────────────────────────────┼──────────────┘ + │ + TCP Connection │ Port 9090 + (Binary Protocol)│ + ▼ +┌──────────────────────────────────────────────────────────────┼──────────────┐ +│ SERVER PROCESS │ │ +│ ┌───────────────────────────────────────────────────────────┴───────────┐ │ +│ │ Main Server Thread │ │ +│ │ - Accept connections (thread-per-connection) │ │ +│ │ - Parse requests via pluggable serializer │ │ +│ │ - Route to sync handler or async job queue │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────┼─────────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ │ +│ │ Result Retrieval │ │ Worker Monitor │ │ Connection │ │ +│ │ Thread │ │ Thread │ │ Handler Threads │ │ +│ └─────────┬─────────┘ └─────────┬─────────┘ └───────────────────┘ │ +│ │ │ │ +│ │ POSIX Shared Memory (Job Queue, Result Queue) │ +│ │ │ │ +│ ┌─────────┴──────────────────────┴─────────────────────────────────────┐ │ +│ │ Shared Memory Region │ │ +│ │ ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────────┐ │ │ +│ │ │ Job Queue │ │ Result Queue │ │ Control Block │ │ │ +│ │ │ (MAX_JOBS=64) │ │ (MAX_RESULTS=64) │ │ (shutdown flag) │ │ │ +│ │ └─────────────────┘ └──────────────────┘ └─────────────────────┘ │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────┼─────────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ │ +│ │ Worker Process │ │ Worker Process │ │ Worker Process │ │ +│ │ (fork) │ │ (fork) │ │ (fork) │ │ +│ │ - GPU Solver │ │ - GPU Solver │ │ - GPU Solver │ │ +│ │ - Isolated │ │ - Isolated │ │ - Isolated │ │ +│ └───────────────────┘ └───────────────────┘ └───────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 2. Components + +### 2.1 Client Components + +| Component | Description | +|-----------|-------------| +| **cuOpt Library** | User-facing API (`solve_lp_remote`, `solve_mip_remote`) | +| **Remote Serializer** | Pluggable serialization (default: Protocol Buffers) | +| **TCP Client** | Socket connection to server, length-prefixed messages | + +### 2.2 Server Components + +| Component | Description | +|-----------|-------------| +| **Main Thread** | Accepts TCP connections, spawns handler threads | +| **Connection Handlers** | Per-connection threads that parse and route requests | +| **Result Retrieval Thread** | Polls shared memory for completed results | +| **Worker Monitor Thread** | Monitors worker processes via `waitpid()`, restarts dead workers | +| **Worker Processes** | Forked processes that execute GPU solves | +| **Shared Memory** | POSIX shared memory for IPC between main process and workers | + +--- + +## 3. Communication Protocol + +### 3.1 Transport Layer + +- **Protocol**: TCP/IP +- **Default Port**: 9090 (configurable via `-p` flag) +- **Encryption**: None (plaintext) - **Security Note: TLS not implemented** +- **Authentication**: None - **Security Note: No auth mechanism** + +### 3.2 Message Format + +All messages use a simple length-prefixed binary format: + +``` +┌────────────────┬─────────────────────────────────┐ +│ Length (4B) │ Payload (N bytes) │ +│ Little-endian │ Serialized Protobuf/Custom │ +└────────────────┴─────────────────────────────────┘ +``` + +### 3.3 Request Types + +| Code | Type | Description | +|------|------|-------------| +| 0 | `SUBMIT_JOB` | Submit optimization problem, receive job_id | +| 1 | `CHECK_STATUS` | Query job status by job_id | +| 2 | `GET_RESULT` | Retrieve solution for completed job | +| 3 | `DELETE_RESULT` | Delete job and free resources | +| 4 | `GET_LOGS` | Retrieve solver output logs | +| 5 | `CANCEL_JOB` | Cancel queued or running job | +| 6 | `WAIT_FOR_RESULT` | Block until job completes (no polling) | + +### 3.4 Job Status Codes + +| Code | Status | Description | +|------|--------|-------------| +| 0 | `QUEUED` | Job submitted, waiting for worker | +| 1 | `PROCESSING` | Worker is solving the problem | +| 2 | `COMPLETED` | Solution available | +| 3 | `FAILED` | Solve failed with error | +| 4 | `NOT_FOUND` | Job ID does not exist | +| 5 | `CANCELLED` | Job was cancelled by user | + +--- + +## 4. Sequence Diagrams + +### 4.1 Asynchronous Job Flow (Normal Case) + +``` +Client Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB (problem) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ │ Generate job_id │ + │ │ Write to Job Queue (shm) │ + │ │ │ + │ Response (job_id) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Poll Job Queue │ + │ │◀──────────────────────────│ + │ │ │ + │ │ Claim Job │ + │ │ (set claimed=true, │ + │ │ worker_pid) │ + │ │──────────────────────────▶│ + │ │ │ + │ CHECK_STATUS (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (PROCESSING) │ Execute GPU Solve │ + │◀──────────────────────────│ │ + │ │ │ + │ ... │ ... │ + │ │ │ + │ │ Write Result (shm) │ + │ │◀──────────────────────────│ + │ │ │ + │ │ Result Retrieval Thread │ + │ │ updates job_tracker │ + │ │ │ + │ CHECK_STATUS (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (COMPLETED) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ GET_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (solution) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ DELETE_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (OK) │ │ + │◀──────────────────────────│ │ +``` + +### 4.2 WAIT_FOR_RESULT Flow (Blocking Wait) + +``` +Client Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB (problem) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (job_id) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ WAIT_FOR_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ │ Handler thread creates │ + │ │ JobWaiter with CV │ + │ │ │ + │ (connection held │ Thread blocks on │ + │ open, no response │ condition_variable.wait() │ + │ yet) │ │ + │ │ │ + │ │ Execute GPU Solve │ + │ │◀─────────────────────────▶│ + │ │ │ + │ │ Write Result (shm) │ + │ │◀──────────────────────────│ + │ │ │ + │ │ Result thread signals │ + │ │ condition_variable │ + │ │ │ + │ │ Handler thread wakes │ + │ │ │ + │ Response (solution) │ │ + │◀──────────────────────────│ │ +``` + +### 4.3 Job Cancellation Flow + +``` +Client A Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB │ │ + │──────────────────────────▶│ │ + │ Response (job_id) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Worker claims job │ + │ │◀──────────────────────────│ + │ │ │ +Client B │ Solving... │ + │ │ │ + │ CANCEL_JOB (job_id) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ │ kill(worker_pid, SIGKILL) │ + │ │──────────────────────────▶│ + │ │ │ + │ │ Worker dies │ + │ │ ✗ + │ │ │ + │ Response (CANCELLED) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Monitor thread detects │ + │ │ dead worker via waitpid() │ + │ │ │ + │ │ Restart worker (fork) │ + │ │ │ + │ │ New Worker │ + │ │◀──────────────────────────│ +``` + +### 4.4 Worker Crash Recovery + +``` +Server (Main) Worker Process + │ │ + │ Worker processing job │ + │◀────────────────────────────────────▶│ + │ │ + │ CRASH/SEGFAULT │ + │ ✗ + │ │ + │ Worker Monitor Thread │ + │ waitpid() returns │ + │ │ + │ Mark job as FAILED │ + │ Signal any waiting threads │ + │ │ + │ fork() new worker │ + │──────────────────────────────────────▶ + │ │ + │ New Worker Ready │ + │◀─────────────────────────────────────│ +``` + +--- + +## 5. Shared Memory Architecture + +### 5.1 Memory Regions + +Three POSIX shared memory segments are created: + +| Name | Size | Purpose | +|------|------|---------| +| `/cuopt_job_queue` | ~64MB | Pending job entries | +| `/cuopt_result_queue` | ~64MB | Completed job results | +| `/cuopt_control` | ~64B | Shutdown flag, control signals | + +### 5.2 Job Queue Entry Structure + +```cpp +struct JobQueueEntry { + char job_id[32]; // Unique job identifier + uint32_t problem_type; // 0=LP, 1=MIP + uint32_t data_size; // Size of serialized problem + uint8_t data[MAX_JOB_DATA]; // Serialized problem data (~1MB) + pid_t worker_pid; // PID of worker processing this job + std::atomic ready; // Job ready for processing + std::atomic claimed; // Job claimed by a worker + std::atomic cancelled; // Job cancelled by user +}; +``` + +### 5.3 Result Queue Entry Structure + +```cpp +struct ResultQueueEntry { + char job_id[32]; // Job identifier + uint32_t status; // 0=success, 1=error, 2=cancelled + uint32_t data_size; // Size of result data + uint8_t data[MAX_RESULT_DATA]; // Serialized solution (~1MB) + char error_message[256]; // Error message if failed + std::atomic ready; // Result ready for retrieval + std::atomic retrieved; // Result has been retrieved +}; +``` + +--- + +## 6. Process Model + +### 6.1 Process Hierarchy + +``` +cuopt_remote_server (main process) +├── Result Retrieval Thread +├── Worker Monitor Thread +├── Connection Handler Thread (per client) +├── Connection Handler Thread (per client) +│ ... +├── Worker Process 0 (forked) +├── Worker Process 1 (forked) +│ ... +└── Worker Process N (forked) +``` + +### 6.2 Worker Isolation + +- Each worker is a separate process created via `fork()` +- Workers have independent memory spaces (except shared memory regions) +- GPU context is isolated per worker process +- Worker crash does not affect main server or other workers +- Workers are automatically restarted by monitor thread + +### 6.3 Resource Limits + +| Resource | Current Limit | Notes | +|----------|---------------|-------| +| Max concurrent jobs | 64 (`MAX_JOBS`) | Compile-time constant | +| Max job data size | ~1MB (`MAX_JOB_DATA_SIZE`) | Per job | +| Max result data size | ~1MB (`MAX_RESULT_DATA_SIZE`) | Per job | +| Max workers | Configurable (`-w` flag) | Default: 1 | +| Connection timeout | None | **Security Note** | + +--- + +## 7. Security Considerations + +### 7.1 Current Security Posture + +| Aspect | Status | Risk Level | +|--------|--------|------------| +| Transport encryption (TLS) | Not implemented | **HIGH** | +| Client authentication | Not implemented | **HIGH** | +| Authorization | Not implemented | **MEDIUM** | +| Input validation | Basic size checks | **MEDIUM** | +| Rate limiting | Not implemented | **MEDIUM** | +| Connection limits | Not implemented | **MEDIUM** | +| Resource quotas | Not implemented | **LOW** | + +### 7.2 Trust Boundaries + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ UNTRUSTED ZONE │ +│ │ +│ ┌─────────────┐ │ +│ │ Client │ Network boundary (no encryption, no auth) │ +│ └──────┬──────┘ │ +│ │ │ +└──────────┼───────────────────────────────────────────────────────────┘ + │ TCP Port 9090 + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ TRUSTED ZONE │ +│ │ +│ ┌──────────────────────────────────────────────────────────────┐ │ +│ │ Server Process │ │ +│ │ - All clients treated equally │ │ +│ │ - No per-user isolation │ │ +│ │ - Shared job queue visible to all workers │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────────┐ │ +│ │ Worker Processes │ │ +│ │ - Process-level isolation from each other │ │ +│ │ - No sandboxing (full system access) │ │ +│ │ - GPU access │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +### 7.3 Attack Surface + +| Attack Vector | Description | Mitigation | +|---------------|-------------|------------| +| Network eavesdropping | Plaintext TCP traffic | Requires TLS implementation | +| Unauthorized access | No authentication | Requires auth mechanism | +| Denial of service | No rate limiting, connection limits | Requires implementation | +| Malformed input | Invalid protobuf/msgpack | Protobuf parsing with error handling | +| Resource exhaustion | Large jobs, many connections | Size limits, but no connection limits | +| Job ID guessing | Sequential-ish IDs | Uses random hex (128-bit) | +| Worker escape | Malicious solver code | Workers are forked, not sandboxed | + +### 7.4 Recommended Security Enhancements + +1. **TLS/SSL**: Add transport encryption for all client-server communication +2. **Authentication**: Implement API key or certificate-based authentication +3. **Authorization**: Per-user job isolation, prevent access to other users' jobs +4. **Rate Limiting**: Limit requests per client per time window +5. **Connection Limits**: Maximum concurrent connections per IP +6. **Input Validation**: Deep validation of problem data before solving +7. **Audit Logging**: Log all operations with client identity +8. **Sandboxing**: Consider containerization or seccomp for workers + +--- + +## 8. Data Flow Summary + +### 8.1 Problem Data Flow + +``` +Client Problem Data + │ + ▼ +┌───────────────────┐ +│ Serialization │ Client-side: optimization_problem_t → protobuf bytes +└─────────┬─────────┘ + │ + ▼ (TCP) +┌───────────────────┐ +│ Job Queue │ Stored in shared memory (~1MB max) +└─────────┬─────────┘ + │ + ▼ (shm read) +┌───────────────────┐ +│ Worker Process │ Deserialize, create GPU data structures +└─────────┬─────────┘ + │ + ▼ +┌───────────────────┐ +│ GPU Solver │ PDLP or MIP solver execution +└─────────┬─────────┘ + │ + ▼ +┌───────────────────┐ +│ Solution │ GPU → Host copy, serialize to bytes +└─────────┬─────────┘ + │ + ▼ (shm write) +┌───────────────────┐ +│ Result Queue │ Stored in shared memory (~1MB max) +└─────────┬─────────┘ + │ + ▼ (TCP) +┌───────────────────┐ +│ Client │ Deserialize: protobuf bytes → solution_t +└───────────────────┘ +``` + +--- + +## 9. Configuration + +### 9.1 Server Command-Line Options + +``` +Usage: cuopt_remote_server [options] +Options: + -p PORT Port to listen on (default: 9090) + -w WORKERS Number of worker processes (default: 1) + -v Verbose logging + -q Quiet mode (minimal output) + --no-logs Disable log streaming feature +``` + +### 9.2 Environment Variables + +| Variable | Description | +|----------|-------------| +| `CUOPT_REMOTE_USE_SYNC` | Force synchronous mode (bypass job queue) | + +--- + +## 10. File Locations + +| Path | Description | +|------|-------------| +| `/tmp/cuopt_logs/log_{job_id}` | Per-job solver log files | +| `/dev/shm/cuopt_job_queue` | Job queue shared memory | +| `/dev/shm/cuopt_result_queue` | Result queue shared memory | +| `/dev/shm/cuopt_control` | Control block shared memory | + +--- + +## Appendix A: Protocol Buffer Schema + +See `cpp/src/linear_programming/utilities/cuopt_remote.proto` for the complete schema definition. + +Key messages: +- `AsyncRequest`: Wrapper for all request types +- `AsyncResponse`: Wrapper for all response types +- `OptimizationProblem`: LP/MIP problem definition +- `LPSolution` / `MIPSolution`: Solution data +- `PDLPSolverSettings` / `MIPSolverSettings`: Solver configuration + +--- + +## Appendix B: Pluggable Serialization + +The server supports custom serialization formats via a plugin interface: + +```cpp +template +class remote_serializer_t { + // Serialize optimization problem + virtual std::vector serialize_lp_request(...) = 0; + + // Deserialize solution + virtual lp_solution_t deserialize_lp_solution(...) = 0; + + // ... additional methods for async protocol +}; +``` + +Built-in serializers: +- **Protobuf** (default): High performance, schema-based +- **MsgPack**: Lightweight, schema-less alternative + +--- + +*Document Version: 1.0* +*Last Updated: January 2026* From 686f6e43b7c53a978af385c4d6bc7d40b00f1e81 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Thu, 8 Jan 2026 16:14:45 -0500 Subject: [PATCH 17/37] Add cancel_job_remote() API and Python binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C++ API: - Added cancel_job_remote() function to remote_solve.hpp/cu - Added remote_job_status_t enum and cancel_job_result_t struct - Simplified cuopt_cancel_job utility to use the new API (194→105 lines) Python API: - Added cuopt.linear_programming.remote module - cancel_job() function for cancelling remote jobs - JobStatus enum and CancelResult dataclass - Exported from cuopt.linear_programming package All three APIs tested and verified working: - Raw protobuf cancel (baseline) - C++ utility cancel (cuopt_cancel_job) - Python API cancel (cancel_job) --- cpp/cuopt_cancel_job.cpp | 123 ++-------- .../utilities/remote_solve.hpp | 35 +++ .../utilities/remote_solve.cu | 71 ++++++ .../cuopt/linear_programming/__init__.py | 3 +- .../cuopt/cuopt/linear_programming/remote.py | 214 ++++++++++++++++++ 5 files changed, 340 insertions(+), 106 deletions(-) create mode 100644 python/cuopt/cuopt/linear_programming/remote.py diff --git a/cpp/cuopt_cancel_job.cpp b/cpp/cuopt_cancel_job.cpp index edd786530..229302d17 100644 --- a/cpp/cuopt_cancel_job.cpp +++ b/cpp/cuopt_cancel_job.cpp @@ -15,48 +15,15 @@ * cuopt_cancel_job job_1234567890abcdef -h 192.168.1.100 -p 9090 */ -#include - -#include -#include -#include -#include -#include +#include #include #include #include #include -#include using namespace cuopt::linear_programming; -static bool write_all(int sockfd, const void* data, size_t size) -{ - const uint8_t* ptr = static_cast(data); - size_t remaining = size; - while (remaining > 0) { - ssize_t written = ::write(sockfd, ptr, remaining); - if (written <= 0) return false; - ptr += written; - remaining -= written; - } - return true; -} - -static bool read_all(int sockfd, void* data, size_t size) -{ - uint8_t* ptr = static_cast(data); - size_t remaining = size; - while (remaining > 0) { - ssize_t nread = ::read(sockfd, ptr, remaining); - if (nread <= 0) return false; - ptr += nread; - remaining -= nread; - } - return true; -} - void print_usage(const char* prog) { std::cout << "Usage: " << prog << " [options]\n" @@ -80,6 +47,19 @@ void print_usage(const char* prog) << " " << prog << " job_1234567890abcdef -h 192.168.1.100 -p 9090\n"; } +const char* status_to_string(remote_job_status_t status) +{ + switch (status) { + case remote_job_status_t::QUEUED: return "QUEUED"; + case remote_job_status_t::PROCESSING: return "PROCESSING"; + case remote_job_status_t::COMPLETED: return "COMPLETED"; + case remote_job_status_t::FAILED: return "FAILED"; + case remote_job_status_t::NOT_FOUND: return "NOT_FOUND"; + case remote_job_status_t::CANCELLED: return "CANCELLED"; + default: return "UNKNOWN"; + } +} + int main(int argc, char** argv) { // Parse arguments @@ -112,82 +92,15 @@ int main(int argc, char** argv) return 1; } - // Connect to server - int sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (sockfd < 0) { - std::cerr << "Error: Failed to create socket\n"; - return 1; - } - - struct sockaddr_in server_addr; - memset(&server_addr, 0, sizeof(server_addr)); - server_addr.sin_family = AF_INET; - server_addr.sin_port = htons(port); - - if (inet_pton(AF_INET, host.c_str(), &server_addr.sin_addr) <= 0) { - // Try hostname resolution - struct hostent* he = gethostbyname(host.c_str()); - if (he == nullptr) { - std::cerr << "Error: Invalid host: " << host << "\n"; - close(sockfd); - return 1; - } - memcpy(&server_addr.sin_addr, he->h_addr_list[0], he->h_length); - } - - if (connect(sockfd, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) { - std::cerr << "Error: Failed to connect to " << host << ":" << port << "\n"; - close(sockfd); - return 1; - } - - // Send cancel request - auto serializer = get_serializer(); - auto request_bytes = serializer->serialize_cancel_request(job_id); - - uint32_t size = request_bytes.size(); - if (!write_all(sockfd, &size, sizeof(size)) || - !write_all(sockfd, request_bytes.data(), request_bytes.size())) { - std::cerr << "Error: Failed to send cancel request\n"; - close(sockfd); - return 1; - } - - // Receive response - if (!read_all(sockfd, &size, sizeof(size))) { - std::cerr << "Error: Failed to receive response size\n"; - close(sockfd); - return 1; - } - - std::vector response_bytes(size); - if (!read_all(sockfd, response_bytes.data(), size)) { - std::cerr << "Error: Failed to receive response\n"; - close(sockfd); - return 1; - } - - close(sockfd); - - // Parse response - auto result = serializer->deserialize_cancel_response(response_bytes); + // Cancel the job using the remote solve API + remote_solve_config_t config{host, port}; + auto result = cancel_job_remote(config, job_id); // Print result std::cout << "Job ID: " << job_id << "\n"; std::cout << "Result: " << (result.success ? "SUCCESS" : "FAILED") << "\n"; std::cout << "Message: " << result.message << "\n"; - - const char* status_str = "UNKNOWN"; - using job_status_t = remote_serializer_t::job_status_t; - switch (result.job_status) { - case job_status_t::QUEUED: status_str = "QUEUED"; break; - case job_status_t::PROCESSING: status_str = "PROCESSING"; break; - case job_status_t::COMPLETED: status_str = "COMPLETED"; break; - case job_status_t::FAILED: status_str = "FAILED"; break; - case job_status_t::NOT_FOUND: status_str = "NOT_FOUND"; break; - case job_status_t::CANCELLED: status_str = "CANCELLED"; break; - } - std::cout << "Job Status: " << status_str << "\n"; + std::cout << "Job Status: " << status_to_string(result.job_status) << "\n"; return result.success ? 0 : 1; } diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp index 3d1a1d1ec..898a1cba5 100644 --- a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp @@ -92,4 +92,39 @@ mip_solution_t solve_mip_remote( const cuopt::mps_parser::data_model_view_t& view, const mip_solver_settings_t& settings); +/** + * @brief Job status enumeration for remote jobs. + */ +enum class remote_job_status_t { + QUEUED, ///< Job is waiting in queue + PROCESSING, ///< Job is being processed by a worker + COMPLETED, ///< Job completed successfully + FAILED, ///< Job failed with an error + NOT_FOUND, ///< Job ID not found on server + CANCELLED ///< Job was cancelled +}; + +/** + * @brief Result of a cancel job request. + */ +struct cancel_job_result_t { + bool success; ///< True if cancellation was successful + std::string message; ///< Success/error message + remote_job_status_t job_status; ///< Status of job after cancel attempt +}; + +/** + * @brief Cancel a job on a remote server. + * + * This function can cancel jobs that are queued (waiting for a worker) or + * currently running. For running jobs, the worker process is killed and + * automatically restarted by the server. + * + * @param config Remote server configuration + * @param job_id The job ID to cancel + * @return Result containing success status, message, and job status + */ +cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config, + const std::string& job_id); + } // namespace cuopt::linear_programming diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu index 2f6ca52c8..67479f873 100644 --- a/cpp/src/linear_programming/utilities/remote_solve.cu +++ b/cpp/src/linear_programming/utilities/remote_solve.cu @@ -380,6 +380,56 @@ static void delete_job(const std::string& host, int port, const std::string& job client.receive_response(response_data); // Ignore result } +template +static cancel_job_result_t cancel_job_impl(const std::string& host, + int port, + const std::string& job_id) +{ + cancel_job_result_t result; + result.success = false; + result.message = "Unknown error"; + result.job_status = remote_job_status_t::NOT_FOUND; + + remote_client_t client(host, port); + if (!client.connect()) { + result.message = "Failed to connect to server"; + return result; + } + + auto serializer = get_serializer(); + auto cancel_request = serializer->serialize_cancel_request(job_id); + + if (!client.send_request(cancel_request)) { + result.message = "Failed to send cancel request"; + return result; + } + + std::vector response_data; + if (!client.receive_response(response_data)) { + result.message = "Failed to receive response"; + return result; + } + + // Deserialize the cancel response + auto cancel_result = serializer->deserialize_cancel_response(response_data); + + result.success = cancel_result.success; + result.message = cancel_result.message; + + // Map serializer job_status_t to remote_job_status_t + using serializer_status = typename remote_serializer_t::job_status_t; + switch (cancel_result.job_status) { + case serializer_status::QUEUED: result.job_status = remote_job_status_t::QUEUED; break; + case serializer_status::PROCESSING: result.job_status = remote_job_status_t::PROCESSING; break; + case serializer_status::COMPLETED: result.job_status = remote_job_status_t::COMPLETED; break; + case serializer_status::FAILED: result.job_status = remote_job_status_t::FAILED; break; + case serializer_status::NOT_FOUND: result.job_status = remote_job_status_t::NOT_FOUND; break; + case serializer_status::CANCELLED: result.job_status = remote_job_status_t::CANCELLED; break; + } + + return result; +} + } // namespace //============================================================================ @@ -598,6 +648,27 @@ mip_solution_t solve_mip_remote( } } +//============================================================================ +// Cancel Job Remote +//============================================================================ + +cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config, + const std::string& job_id) +{ + CUOPT_LOG_INFO("[remote_solve] Cancelling job {} on {}:{}", job_id, config.host, config.port); + + // Use int32_t, double as the type parameters (doesn't affect cancel logic) + auto result = cancel_job_impl(config.host, config.port, job_id); + + if (result.success) { + CUOPT_LOG_INFO("[remote_solve] Job {} cancelled successfully", job_id); + } else { + CUOPT_LOG_WARN("[remote_solve] Failed to cancel job {}: {}", job_id, result.message); + } + + return result; +} + // Explicit instantiations #if CUOPT_INSTANTIATE_FLOAT template optimization_problem_solution_t solve_lp_remote( diff --git a/python/cuopt/cuopt/linear_programming/__init__.py b/python/cuopt/cuopt/linear_programming/__init__.py index d267c2171..c16e7bd7f 100644 --- a/python/cuopt/cuopt/linear_programming/__init__.py +++ b/python/cuopt/cuopt/linear_programming/__init__.py @@ -1,9 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from cuopt.linear_programming import internals from cuopt.linear_programming.data_model import DataModel from cuopt.linear_programming.problem import Problem +from cuopt.linear_programming.remote import CancelResult, JobStatus, cancel_job from cuopt.linear_programming.solution import Solution from cuopt.linear_programming.solver import BatchSolve, Solve from cuopt.linear_programming.solver_settings import ( diff --git a/python/cuopt/cuopt/linear_programming/remote.py b/python/cuopt/cuopt/linear_programming/remote.py new file mode 100644 index 000000000..70a4ae382 --- /dev/null +++ b/python/cuopt/cuopt/linear_programming/remote.py @@ -0,0 +1,214 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Remote solve utilities for cuOpt. + +This module provides functions for interacting with a remote cuopt_remote_server, +including job management operations like cancellation. +""" + +import os +import socket +import struct +from dataclasses import dataclass +from enum import IntEnum +from typing import Optional + +# Try to import the protobuf module - may not be available in all environments +try: + import sys + + # Add build directory for development + build_path = os.path.join( + os.path.dirname(__file__), "..", "..", "..", "..", "..", "cpp", "build" + ) + if os.path.exists(build_path): + sys.path.insert(0, os.path.abspath(build_path)) + import cuopt_remote_pb2 as pb + + _HAS_PROTOBUF = True +except ImportError: + _HAS_PROTOBUF = False + + +class JobStatus(IntEnum): + """Status of a remote job.""" + + QUEUED = 0 # Job is waiting in queue + PROCESSING = 1 # Job is being processed by a worker + COMPLETED = 2 # Job completed successfully + FAILED = 3 # Job failed with an error + NOT_FOUND = 4 # Job ID not found on server + CANCELLED = 5 # Job was cancelled + + +@dataclass +class CancelResult: + """Result of a cancel job request.""" + + success: bool + message: str + job_status: JobStatus + + +def get_remote_config() -> Optional[tuple]: + """Get remote server configuration from environment variables. + + Returns + ------- + tuple or None + (host, port) tuple if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are set, + None otherwise. + """ + host = os.environ.get("CUOPT_REMOTE_HOST", "") + port = os.environ.get("CUOPT_REMOTE_PORT", "") + + if host and port: + try: + return (host, int(port)) + except ValueError: + return None + return None + + +def cancel_job( + job_id: str, host: Optional[str] = None, port: Optional[int] = None +) -> CancelResult: + """Cancel a job on a remote cuopt_remote_server. + + This function can cancel jobs that are queued (waiting for a worker) or + currently running. For running jobs, the worker process is killed and + automatically restarted by the server. + + Parameters + ---------- + job_id : str + The job ID to cancel (e.g., "job_1234567890abcdef") + host : str, optional + Server hostname. If not provided, uses CUOPT_REMOTE_HOST environment variable. + port : int, optional + Server port. If not provided, uses CUOPT_REMOTE_PORT environment variable. + + Returns + ------- + CancelResult + Result containing success status, message, and job status after cancellation. + + Raises + ------ + RuntimeError + If protobuf module is not available or connection fails. + ValueError + If host/port are not provided and environment variables are not set. + + Examples + -------- + >>> # Using environment variables + >>> import os + >>> os.environ['CUOPT_REMOTE_HOST'] = 'localhost' + >>> os.environ['CUOPT_REMOTE_PORT'] = '9090' + >>> result = cancel_job("job_1234567890abcdef") + >>> print(result.success, result.message) + + >>> # Explicitly specifying host and port + >>> result = cancel_job("job_1234567890abcdef", host="192.168.1.100", port=9090) + """ + if not _HAS_PROTOBUF: + raise RuntimeError( + "Protobuf module not available. Please install protobuf or ensure " + "cuopt_remote_pb2.py is in the Python path." + ) + + # Get host/port from parameters or environment + if host is None or port is None: + config = get_remote_config() + if config is None: + raise ValueError( + "Host and port must be provided or set via CUOPT_REMOTE_HOST " + "and CUOPT_REMOTE_PORT environment variables." + ) + if host is None: + host = config[0] + if port is None: + port = config[1] + + # Create cancel request + request = pb.AsyncRequest() + request.request_type = pb.CANCEL_JOB + request.job_id = job_id + + try: + # Connect to server + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(30.0) # 30 second timeout + sock.connect((host, port)) + + # Send request (length-prefixed) + data = request.SerializeToString() + sock.sendall(struct.pack(" Date: Thu, 8 Jan 2026 16:26:53 -0500 Subject: [PATCH 18/37] Implement unified sync/async architecture Server-side: - All jobs (blocking and non-blocking) now go through the shared memory job queue and are processed by worker processes - For blocking=true requests, server submits to queue then waits on condition variable until completion, then auto-deletes and returns - This enables cancellation for 'sync' jobs and allows concurrent solves - Single code path for all job processing Client-side: - Updated solve_lp_remote and solve_mip_remote to use the unified response format (result_response) for blocking mode - Removed streaming log dependency for blocking mode Benefits: - Cancellation works for all jobs (blocking and non-blocking) - Multiple concurrent 'sync' solves possible (one per worker) - Simpler code with single processing path - Worker isolation (one job failure doesn't crash server) --- cpp/cuopt_remote_server.cpp | 49 ++++++++++++++++--- .../utilities/remote_solve.cu | 37 +++++++++----- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index b19c51580..d75a7a3e3 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -1292,15 +1292,48 @@ void handle_client(int client_fd, bool stream_logs) // Extract the actual problem data from the async request std::vector problem_data = serializer->extract_problem_data(request_data); - if (blocking) { - // Sync mode - handle with log streaming - handle_sync_solve(client_fd, problem_data, is_mip, stream_logs); - return; - } else { - // Async mode - submit and return job_id - auto [success, result] = submit_job_async(problem_data, is_mip); - auto response = serializer->serialize_submit_response(success, result); + // UNIFIED ARCHITECTURE: All jobs go through the queue + // Submit job to queue (same for both sync and async) + auto [submit_ok, job_id_or_error] = submit_job_async(problem_data, is_mip); + + if (!submit_ok) { + // Submission failed + auto response = serializer->serialize_submit_response(false, job_id_or_error); + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + } else if (blocking) { + // BLOCKING MODE: Wait for result using condition variable (no polling) + // This unifies sync/async - job goes through queue but we wait here + std::string job_id = job_id_or_error; + + if (config.verbose) { + std::cout << "[Server] Blocking request, job_id: " << job_id + << " (waiting for completion)\n"; + } + + std::vector result_data; + std::string error_message; + + // Block on condition variable until job completes + bool success = wait_for_result(job_id, result_data, error_message); + + // Auto-delete job after returning result (client doesn't need to call DELETE) + delete_job(job_id); + + // Return result response (same format as GET_RESULT) + auto response = serializer->serialize_result_response(success, result_data, error_message); + uint32_t size = response.size(); + write_all(client_fd, &size, sizeof(size)); + write_all(client_fd, response.data(), response.size()); + if (config.verbose) { + std::cout << "[Server] Blocking request completed: " << job_id << ", success=" << success + << "\n"; + } + } else { + // ASYNC MODE: Return job_id immediately + auto response = serializer->serialize_submit_response(true, job_id_or_error); uint32_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu index 67479f873..42cc0be61 100644 --- a/cpp/src/linear_programming/utilities/remote_solve.cu +++ b/cpp/src/linear_programming/utilities/remote_solve.cu @@ -466,13 +466,18 @@ optimization_problem_solution_t solve_lp_remote( if (sync_mode) { //========================================================================= - // SYNC MODE: Use blocking request with streaming logs + // SYNC/BLOCKING MODE: Unified architecture + // + // Server-side: Job goes through queue, handled by worker process. + // Client blocks until completion (server uses condition variable). + // This enables cancellation for "sync" jobs and concurrent solves. //========================================================================= // Serialize as async request with blocking=true std::vector request_data = serializer->serialize_async_lp_request(view, settings, true /* blocking */); - CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (sync): {} bytes", request_data.size()); + CUOPT_LOG_DEBUG("[remote_solve] Serialized LP request (blocking): {} bytes", + request_data.size()); // Connect and send remote_client_t client(config.host, config.port); @@ -486,17 +491,17 @@ optimization_problem_solution_t solve_lp_remote( "Failed to send request to remote server", cuopt::error_type_t::RuntimeError)); } - // Receive response with streaming log support + // Receive response (server blocks until job completes, then returns result) std::vector response_data; - if (!client.receive_streaming_response(response_data, settings.log_to_console)) { + if (!client.receive_response(response_data)) { return optimization_problem_solution_t(cuopt::logic_error( "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); } - CUOPT_LOG_DEBUG("[remote_solve] Received LP solution (sync): {} bytes", response_data.size()); + CUOPT_LOG_DEBUG("[remote_solve] Received LP result (blocking): {} bytes", response_data.size()); - // Deserialize solution - return serializer->deserialize_lp_solution(response_data); + // Deserialize solution from result response (same format as async GET_RESULT) + return serializer->deserialize_lp_result_response(response_data); } else { //========================================================================= @@ -577,12 +582,17 @@ mip_solution_t solve_mip_remote( if (sync_mode) { //========================================================================= - // SYNC MODE: Use blocking request with streaming logs + // SYNC/BLOCKING MODE: Unified architecture + // + // Server-side: Job goes through queue, handled by worker process. + // Client blocks until completion (server uses condition variable). + // This enables cancellation for "sync" jobs and concurrent solves. //========================================================================= std::vector request_data = serializer->serialize_async_mip_request(view, settings, true /* blocking */); - CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (sync): {} bytes", request_data.size()); + CUOPT_LOG_DEBUG("[remote_solve] Serialized MIP request (blocking): {} bytes", + request_data.size()); remote_client_t client(config.host, config.port); if (!client.connect()) { @@ -595,15 +605,18 @@ mip_solution_t solve_mip_remote( cuopt::error_type_t::RuntimeError)); } + // Receive response (server blocks until job completes, then returns result) std::vector response_data; - if (!client.receive_streaming_response(response_data, true /* log_to_console */)) { + if (!client.receive_response(response_data)) { return mip_solution_t(cuopt::logic_error( "Failed to receive response from remote server", cuopt::error_type_t::RuntimeError)); } - CUOPT_LOG_DEBUG("[remote_solve] Received MIP solution (sync): {} bytes", response_data.size()); + CUOPT_LOG_DEBUG("[remote_solve] Received MIP result (blocking): {} bytes", + response_data.size()); - return serializer->deserialize_mip_solution(response_data); + // Deserialize solution from result response (same format as async GET_RESULT) + return serializer->deserialize_mip_result_response(response_data); } else { //========================================================================= From 6e19947c5b035a1a198a1515345f07f581816fef Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 10:24:57 -0500 Subject: [PATCH 19/37] Align serialization field names with data_model_view_t - Rename protobuf OptimizationProblem fields to match data_model_view_t: * constraint_matrix_values -> A_values (matches A_) * constraint_matrix_indices -> A_indices (matches A_indices_) * constraint_matrix_offsets -> A_offsets (matches A_offsets_) * objective_coefficients -> c (matches c_) * constraint_bounds -> b (matches b_) * is_integer/is_binary -> variable_types (bytes, matches variable_types_) - Add missing fields to protobuf and msgpack serialization: * initial_primal_solution - for warm start initial primal values * initial_dual_solution - for warm start initial dual values * Q_values, Q_indices, Q_offsets - quadratic objective matrix for QPS - Update msgpack serializer to use identical field names as protobuf - Update protobuf_serializer.cu serialization/deserialization - Both serializers now handle all data_model_view_t fields --- .../utilities/cuopt_remote.proto | 127 +++- .../utilities/protobuf_serializer.cu | 378 ++++++++--- .../serializers/msgpack_serializer.cpp | 600 +++++++++++++++--- 3 files changed, 873 insertions(+), 232 deletions(-) diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto index 8f9af6bda..5b64e5055 100644 --- a/cpp/src/linear_programming/utilities/cuopt_remote.proto +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -28,7 +28,7 @@ enum FloatType { DOUBLE = 1; } -// Optimization problem representation (matches optimization_problem_t) +// Optimization problem representation (field names match data_model_view_t) message OptimizationProblem { // Problem metadata string problem_name = 1; @@ -38,29 +38,37 @@ message OptimizationProblem { double objective_offset = 5; ProblemCategory problem_category = 6; - // Variable names (optional, can be large) + // Variable and row names (optional) repeated string variable_names = 7; repeated string row_names = 8; - // Constraint matrix in CSR format - repeated double constraint_matrix_values = 10; - repeated int32 constraint_matrix_indices = 11; - repeated int32 constraint_matrix_offsets = 12; + // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_) + repeated double A_values = 10; + repeated int32 A_indices = 11; + repeated int32 A_offsets = 12; - // Problem vectors - repeated double objective_coefficients = 20; - repeated double constraint_bounds = 21; + // Problem vectors (names match data_model_view_t: c_, b_) + repeated double c = 20; // objective coefficients + repeated double b = 21; // constraint bounds (RHS) repeated double variable_lower_bounds = 22; repeated double variable_upper_bounds = 23; - // Constraint bounds (two representations supported by cuOpt) + // Constraint bounds (alternative to b + row_types) repeated double constraint_lower_bounds = 24; repeated double constraint_upper_bounds = 25; - bytes row_types = 26; // char array of constraint types ('<', '>', '=') + bytes row_types = 26; // char array: 'E' (=), 'L' (<=), 'G' (>=), 'N' (objective) - // Integer/binary variable indicators (for MIP) - repeated bool is_integer = 30; - repeated bool is_binary = 31; + // Variable types (matches data_model_view_t: variable_types_) + bytes variable_types = 30; // char array: 'C' (continuous), 'I' (integer), 'B' (binary) + + // Initial solutions (matches data_model_view_t) + repeated double initial_primal_solution = 40; + repeated double initial_dual_solution = 41; + + // Quadratic objective matrix Q in CSR format for QPS (matches data_model_view_t: Q_objective_) + repeated double Q_values = 50; + repeated int32 Q_indices = 51; + repeated int32 Q_offsets = 52; } enum ProblemCategory { @@ -69,23 +77,64 @@ enum ProblemCategory { IP_PROBLEM = 2; } -// PDLP solver settings +// PDLP solver mode enum (matches cuOpt pdlp_solver_mode_t) +enum PDLPSolverMode { + PDLP_MODE_STABLE1 = 0; + PDLP_MODE_STABLE2 = 1; + PDLP_MODE_METHODICAL1 = 2; + PDLP_MODE_FAST1 = 3; + PDLP_MODE_STABLE3 = 4; +} + +// LP solver method enum (matches cuOpt method_t) +enum LPMethod { + METHOD_CONCURRENT = 0; + METHOD_PDLP = 1; + METHOD_DUAL_SIMPLEX = 2; + METHOD_BARRIER = 3; +} + +// PDLP solver settings (field names match cuOpt Python/C++ API) message PDLPSolverSettings { - // Termination criteria - double eps_optimal_absolute = 1; - double eps_optimal_relative = 2; - double eps_primal_infeasible = 3; - double eps_dual_infeasible = 4; - int32 time_sec_limit = 5; - int32 iteration_limit = 6; - - // Algorithm parameters - double initial_primal_weight = 10; - double initial_step_size = 11; - int32 verbosity = 12; + // Termination tolerances + double absolute_gap_tolerance = 1; + double relative_gap_tolerance = 2; + double primal_infeasible_tolerance = 3; + double dual_infeasible_tolerance = 4; + double absolute_dual_tolerance = 5; + double relative_dual_tolerance = 6; + double absolute_primal_tolerance = 7; + double relative_primal_tolerance = 8; + + // Limits + double time_limit = 10; + int64 iteration_limit = 11; + + // Solver configuration + bool log_to_console = 20; + bool detect_infeasibility = 21; + bool strict_infeasibility = 22; + PDLPSolverMode pdlp_solver_mode = 23; + LPMethod method = 24; + bool presolve = 25; + bool dual_postsolve = 26; + bool crossover = 27; + int32 num_gpus = 28; + + // Advanced options + bool per_constraint_residual = 30; + bool cudss_deterministic = 31; + int32 folding = 32; + int32 augmented = 33; + int32 dualize = 34; + int32 ordering = 35; + int32 barrier_dual_initial_point = 36; + bool eliminate_dense_columns = 37; + bool save_best_primal_so_far = 38; + bool first_primal_feasible = 39; // Warm start data (if provided) - PDLPWarmStartData warm_start_data = 20; + PDLPWarmStartData warm_start_data = 50; } message PDLPWarmStartData { @@ -109,12 +158,26 @@ message PDLPWarmStartData { int32 iterations_since_last_restart = 17; } -// MIP solver settings +// MIP solver settings (field names match cuOpt Python/C++ API) message MIPSolverSettings { + // Limits double time_limit = 1; - double mip_gap = 2; - int32 verbosity = 3; - // Additional MIP settings can be added here + + // Tolerances + double relative_mip_gap = 2; + double absolute_mip_gap = 3; + double integrality_tolerance = 4; + double absolute_tolerance = 5; + double relative_tolerance = 6; + double presolve_absolute_tolerance = 7; + + // Solver configuration + bool log_to_console = 10; + bool heuristics_only = 11; + int32 num_cpu_threads = 12; + int32 num_gpus = 13; + bool presolve = 14; + bool mip_scaling = 15; } // LP solve request diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 685f5cade..70f591f8b 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -136,12 +136,21 @@ class protobuf_serializer_t : public remote_serializer_t { // Serialize problem data serialize_problem_to_proto(view, request.mutable_problem()); - // Serialize settings + // Serialize all MIP settings (names match cuOpt API) auto* pb_settings = request.mutable_settings(); pb_settings->set_time_limit(settings.time_limit); - pb_settings->set_mip_gap(settings.tolerances.relative_mip_gap); - // Note: verbosity not directly available in mip_solver_settings_t - pb_settings->set_verbosity(0); + pb_settings->set_relative_mip_gap(settings.tolerances.relative_mip_gap); + pb_settings->set_absolute_mip_gap(settings.tolerances.absolute_mip_gap); + pb_settings->set_integrality_tolerance(settings.tolerances.integrality_tolerance); + pb_settings->set_absolute_tolerance(settings.tolerances.absolute_tolerance); + pb_settings->set_relative_tolerance(settings.tolerances.relative_tolerance); + pb_settings->set_presolve_absolute_tolerance(settings.tolerances.presolve_absolute_tolerance); + pb_settings->set_log_to_console(settings.log_to_console); + pb_settings->set_heuristics_only(settings.heuristics_only); + pb_settings->set_num_cpu_threads(settings.num_cpu_threads); + pb_settings->set_num_gpus(settings.num_gpus); + pb_settings->set_presolve(settings.presolve); + pb_settings->set_mip_scaling(settings.mip_scaling); // Serialize to bytes std::vector result(request.ByteSizeLong()); @@ -326,10 +335,21 @@ class protobuf_serializer_t : public remote_serializer_t { serialize_problem_to_proto(view, mip_request->mutable_problem()); + // Serialize all MIP settings (names match cuOpt API) auto* pb_settings = mip_request->mutable_settings(); pb_settings->set_time_limit(settings.time_limit); - pb_settings->set_mip_gap(settings.tolerances.relative_mip_gap); - pb_settings->set_verbosity(0); + pb_settings->set_relative_mip_gap(settings.tolerances.relative_mip_gap); + pb_settings->set_absolute_mip_gap(settings.tolerances.absolute_mip_gap); + pb_settings->set_integrality_tolerance(settings.tolerances.integrality_tolerance); + pb_settings->set_absolute_tolerance(settings.tolerances.absolute_tolerance); + pb_settings->set_relative_tolerance(settings.tolerances.relative_tolerance); + pb_settings->set_presolve_absolute_tolerance(settings.tolerances.presolve_absolute_tolerance); + pb_settings->set_log_to_console(settings.log_to_console); + pb_settings->set_heuristics_only(settings.heuristics_only); + pb_settings->set_num_cpu_threads(settings.num_cpu_threads); + pb_settings->set_num_gpus(settings.num_gpus); + pb_settings->set_presolve(settings.presolve); + pb_settings->set_mip_scaling(settings.mip_scaling); std::vector result(request.ByteSizeLong()); request.SerializeToArray(result.data(), result.size()); @@ -804,22 +824,23 @@ class protobuf_serializer_t : public remote_serializer_t { const i_t* indices_ptr = indices_span.data(); const i_t* offsets_ptr = offsets_span.data(); + // Constraint matrix A in CSR format (field names match data_model_view_t) for (i_t i = 0; i < nnz; ++i) { - pb_problem->add_constraint_matrix_values(static_cast(values_ptr[i])); + pb_problem->add_a_values(static_cast(values_ptr[i])); } for (i_t i = 0; i < nnz; ++i) { - pb_problem->add_constraint_matrix_indices(static_cast(indices_ptr[i])); + pb_problem->add_a_indices(static_cast(indices_ptr[i])); } for (i_t i = 0; i <= n_rows; ++i) { - pb_problem->add_constraint_matrix_offsets(static_cast(offsets_ptr[i])); + pb_problem->add_a_offsets(static_cast(offsets_ptr[i])); } - // Objective coefficients + // Objective coefficients c (field name matches data_model_view_t: c_) auto obj_span = view.get_objective_coefficients(); auto n_cols = static_cast(obj_span.size()); const f_t* obj_ptr = obj_span.data(); for (i_t i = 0; i < n_cols; ++i) { - pb_problem->add_objective_coefficients(static_cast(obj_ptr[i])); + pb_problem->add_c(static_cast(obj_ptr[i])); } // Variable bounds @@ -900,51 +921,130 @@ class protobuf_serializer_t : public remote_serializer_t { pb_problem->add_row_names(name); } - // Variable types (for MIP problems) + // Variable types (for MIP problems) - stored as bytes to match data_model_view_t auto var_types_span = view.get_variable_types(); if (var_types_span.size() > 0) { - const char* var_types_ptr = var_types_span.data(); - for (i_t i = 0; i < n_cols; ++i) { - char vtype = var_types_ptr[i]; - // 'I' = integer, 'B' = binary, 'C' = continuous (default) - pb_problem->add_is_integer(vtype == 'I' || vtype == 'B'); - pb_problem->add_is_binary(vtype == 'B'); + pb_problem->set_variable_types(std::string(var_types_span.data(), var_types_span.size())); + } + + // Row types - store directly as bytes + auto row_types_span = view.get_row_types(); + if (row_types_span.size() > 0) { + pb_problem->set_row_types(std::string(row_types_span.data(), row_types_span.size())); + } + + // Constraint bounds b (RHS) - store directly if available + auto b_span = view.get_constraint_bounds(); + if (b_span.size() > 0) { + const f_t* b_ptr = b_span.data(); + for (size_t i = 0; i < b_span.size(); ++i) { + pb_problem->add_b(static_cast(b_ptr[i])); + } + } + + // Initial solutions (if available) + auto init_primal_span = view.get_initial_primal_solution(); + if (init_primal_span.size() > 0) { + const f_t* init_primal_ptr = init_primal_span.data(); + for (size_t i = 0; i < init_primal_span.size(); ++i) { + pb_problem->add_initial_primal_solution(static_cast(init_primal_ptr[i])); + } + } + + auto init_dual_span = view.get_initial_dual_solution(); + if (init_dual_span.size() > 0) { + const f_t* init_dual_ptr = init_dual_span.data(); + for (size_t i = 0; i < init_dual_span.size(); ++i) { + pb_problem->add_initial_dual_solution(static_cast(init_dual_ptr[i])); + } + } + + // Quadratic objective matrix Q (for QPS problems) + if (view.has_quadratic_objective()) { + auto q_values_span = view.get_quadratic_objective_values(); + auto q_indices_span = view.get_quadratic_objective_indices(); + auto q_offsets_span = view.get_quadratic_objective_offsets(); + + const f_t* q_values_ptr = q_values_span.data(); + const i_t* q_indices_ptr = q_indices_span.data(); + const i_t* q_offsets_ptr = q_offsets_span.data(); + + for (size_t i = 0; i < q_values_span.size(); ++i) { + pb_problem->add_q_values(static_cast(q_values_ptr[i])); + } + for (size_t i = 0; i < q_indices_span.size(); ++i) { + pb_problem->add_q_indices(static_cast(q_indices_ptr[i])); + } + for (size_t i = 0; i < q_offsets_span.size(); ++i) { + pb_problem->add_q_offsets(static_cast(q_offsets_ptr[i])); } } } - void serialize_lp_settings_to_proto(const pdlp_solver_settings_t& settings, - cuopt::remote::PDLPSolverSettings* pb_settings) + // Convert cuOpt pdlp_solver_mode_t to protobuf enum + cuopt::remote::PDLPSolverMode to_proto_pdlp_mode(pdlp_solver_mode_t mode) { - // Map from cuOpt tolerances to protobuf settings - pb_settings->set_eps_optimal_absolute(settings.tolerances.absolute_gap_tolerance); - pb_settings->set_eps_optimal_relative(settings.tolerances.relative_gap_tolerance); - pb_settings->set_eps_primal_infeasible(settings.tolerances.primal_infeasible_tolerance); - pb_settings->set_eps_dual_infeasible(settings.tolerances.dual_infeasible_tolerance); - - // Handle infinity time_limit: use -1 to signal "no limit" in the protobuf - // This avoids undefined behavior when casting infinity to int32_t - int32_t time_limit_sec = -1; // -1 means no limit - if (std::isfinite(settings.time_limit) && settings.time_limit > 0) { - time_limit_sec = static_cast( - std::min(settings.time_limit, static_cast(std::numeric_limits::max()))); + switch (mode) { + case pdlp_solver_mode_t::Stable1: return cuopt::remote::PDLP_MODE_STABLE1; + case pdlp_solver_mode_t::Stable2: return cuopt::remote::PDLP_MODE_STABLE2; + case pdlp_solver_mode_t::Methodical1: return cuopt::remote::PDLP_MODE_METHODICAL1; + case pdlp_solver_mode_t::Fast1: return cuopt::remote::PDLP_MODE_FAST1; + case pdlp_solver_mode_t::Stable3: return cuopt::remote::PDLP_MODE_STABLE3; + default: return cuopt::remote::PDLP_MODE_STABLE3; } - pb_settings->set_time_sec_limit(time_limit_sec); - - // Handle max iteration limit similarly - int32_t iter_limit = -1; // -1 means no limit - if (settings.iteration_limit > 0 && - settings.iteration_limit < std::numeric_limits::max()) { - iter_limit = - static_cast(std::min(static_cast(settings.iteration_limit), - static_cast(std::numeric_limits::max()))); + } + + // Convert cuOpt method_t to protobuf enum + cuopt::remote::LPMethod to_proto_method(method_t method) + { + switch (method) { + case method_t::Concurrent: return cuopt::remote::METHOD_CONCURRENT; + case method_t::PDLP: return cuopt::remote::METHOD_PDLP; + case method_t::DualSimplex: return cuopt::remote::METHOD_DUAL_SIMPLEX; + case method_t::Barrier: return cuopt::remote::METHOD_BARRIER; + default: return cuopt::remote::METHOD_CONCURRENT; } - pb_settings->set_iteration_limit(iter_limit); + } - // initial_primal_weight and initial_step_size are not directly accessible, use defaults - pb_settings->set_initial_primal_weight(1.0); - pb_settings->set_initial_step_size(1.0); - pb_settings->set_verbosity(settings.log_to_console ? 1 : 0); + void serialize_lp_settings_to_proto(const pdlp_solver_settings_t& settings, + cuopt::remote::PDLPSolverSettings* pb_settings) + { + // Termination tolerances (all names match cuOpt API) + pb_settings->set_absolute_gap_tolerance(settings.tolerances.absolute_gap_tolerance); + pb_settings->set_relative_gap_tolerance(settings.tolerances.relative_gap_tolerance); + pb_settings->set_primal_infeasible_tolerance(settings.tolerances.primal_infeasible_tolerance); + pb_settings->set_dual_infeasible_tolerance(settings.tolerances.dual_infeasible_tolerance); + pb_settings->set_absolute_dual_tolerance(settings.tolerances.absolute_dual_tolerance); + pb_settings->set_relative_dual_tolerance(settings.tolerances.relative_dual_tolerance); + pb_settings->set_absolute_primal_tolerance(settings.tolerances.absolute_primal_tolerance); + pb_settings->set_relative_primal_tolerance(settings.tolerances.relative_primal_tolerance); + + // Limits + pb_settings->set_time_limit(settings.time_limit); + pb_settings->set_iteration_limit(static_cast(settings.iteration_limit)); + + // Solver configuration + pb_settings->set_log_to_console(settings.log_to_console); + pb_settings->set_detect_infeasibility(settings.detect_infeasibility); + pb_settings->set_strict_infeasibility(settings.strict_infeasibility); + pb_settings->set_pdlp_solver_mode(to_proto_pdlp_mode(settings.pdlp_solver_mode)); + pb_settings->set_method(to_proto_method(settings.method)); + pb_settings->set_presolve(settings.presolve); + pb_settings->set_dual_postsolve(settings.dual_postsolve); + pb_settings->set_crossover(settings.crossover); + pb_settings->set_num_gpus(settings.num_gpus); + + // Advanced options + pb_settings->set_per_constraint_residual(settings.per_constraint_residual); + pb_settings->set_cudss_deterministic(settings.cudss_deterministic); + pb_settings->set_folding(settings.folding); + pb_settings->set_augmented(settings.augmented); + pb_settings->set_dualize(settings.dualize); + pb_settings->set_ordering(settings.ordering); + pb_settings->set_barrier_dual_initial_point(settings.barrier_dual_initial_point); + pb_settings->set_eliminate_dense_columns(settings.eliminate_dense_columns); + pb_settings->set_save_best_primal_so_far(settings.save_best_primal_so_far); + pb_settings->set_first_primal_feasible(settings.first_primal_feasible); } //============================================================================ @@ -960,13 +1060,10 @@ class protobuf_serializer_t : public remote_serializer_t { mps_data.set_objective_scaling_factor(pb_problem.objective_scaling_factor()); mps_data.set_objective_offset(pb_problem.objective_offset()); - // Copy constraint matrix - mps_data_model_t copies the data - std::vector values(pb_problem.constraint_matrix_values().begin(), - pb_problem.constraint_matrix_values().end()); - std::vector indices(pb_problem.constraint_matrix_indices().begin(), - pb_problem.constraint_matrix_indices().end()); - std::vector offsets(pb_problem.constraint_matrix_offsets().begin(), - pb_problem.constraint_matrix_offsets().end()); + // Constraint matrix A in CSR format (field names match data_model_view_t) + std::vector values(pb_problem.a_values().begin(), pb_problem.a_values().end()); + std::vector indices(pb_problem.a_indices().begin(), pb_problem.a_indices().end()); + std::vector offsets(pb_problem.a_offsets().begin(), pb_problem.a_offsets().end()); mps_data.set_csr_constraint_matrix(values.data(), static_cast(values.size()), @@ -975,9 +1072,8 @@ class protobuf_serializer_t : public remote_serializer_t { offsets.data(), static_cast(offsets.size())); - // Objective coefficients - std::vector obj(pb_problem.objective_coefficients().begin(), - pb_problem.objective_coefficients().end()); + // Objective coefficients c + std::vector obj(pb_problem.c().begin(), pb_problem.c().end()); mps_data.set_objective_coefficients(obj.data(), static_cast(obj.size())); // Variable bounds @@ -988,13 +1084,24 @@ class protobuf_serializer_t : public remote_serializer_t { mps_data.set_variable_lower_bounds(var_lb.data(), static_cast(var_lb.size())); mps_data.set_variable_upper_bounds(var_ub.data(), static_cast(var_ub.size())); - // Constraint bounds - std::vector con_lb(pb_problem.constraint_lower_bounds().begin(), - pb_problem.constraint_lower_bounds().end()); - std::vector con_ub(pb_problem.constraint_upper_bounds().begin(), - pb_problem.constraint_upper_bounds().end()); - mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); - mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); + // Constraint bounds (prefer lower/upper bounds if available) + if (pb_problem.constraint_lower_bounds_size() > 0) { + std::vector con_lb(pb_problem.constraint_lower_bounds().begin(), + pb_problem.constraint_lower_bounds().end()); + std::vector con_ub(pb_problem.constraint_upper_bounds().begin(), + pb_problem.constraint_upper_bounds().end()); + mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); + mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); + } else if (pb_problem.b_size() > 0) { + // Use b (RHS) + row_types format + std::vector b(pb_problem.b().begin(), pb_problem.b().end()); + mps_data.set_constraint_bounds(b.data(), static_cast(b.size())); + + if (!pb_problem.row_types().empty()) { + const std::string& row_types = pb_problem.row_types(); + mps_data.set_row_types(row_types.data(), static_cast(row_types.size())); + } + } // Variable names if (pb_problem.variable_names_size() > 0) { @@ -1010,56 +1117,129 @@ class protobuf_serializer_t : public remote_serializer_t { mps_data.set_row_names(row_names); } - // Variable types (for MIP problems) - // is_integer and is_binary arrays indicate variable types - if (pb_problem.is_integer_size() > 0) { - i_t n_vars = static_cast(pb_problem.is_integer_size()); - std::vector var_types(n_vars, 'C'); // Default to continuous - for (i_t i = 0; i < n_vars; ++i) { - if (pb_problem.is_binary_size() > i && pb_problem.is_binary(i)) { - var_types[i] = 'B'; // Binary - } else if (pb_problem.is_integer(i)) { - var_types[i] = 'I'; // Integer - } - } + // Variable types (stored as bytes, matching data_model_view_t) + if (!pb_problem.variable_types().empty()) { + const std::string& var_types_str = pb_problem.variable_types(); + std::vector var_types(var_types_str.begin(), var_types_str.end()); mps_data.set_variable_types(var_types); } + + // Initial solutions (if provided) + if (pb_problem.initial_primal_solution_size() > 0) { + std::vector init_primal(pb_problem.initial_primal_solution().begin(), + pb_problem.initial_primal_solution().end()); + mps_data.set_initial_primal_solution(init_primal.data(), + static_cast(init_primal.size())); + } + + if (pb_problem.initial_dual_solution_size() > 0) { + std::vector init_dual(pb_problem.initial_dual_solution().begin(), + pb_problem.initial_dual_solution().end()); + mps_data.set_initial_dual_solution(init_dual.data(), static_cast(init_dual.size())); + } + + // Quadratic objective matrix Q (for QPS problems) + if (pb_problem.q_values_size() > 0) { + std::vector q_values(pb_problem.q_values().begin(), pb_problem.q_values().end()); + std::vector q_indices(pb_problem.q_indices().begin(), pb_problem.q_indices().end()); + std::vector q_offsets(pb_problem.q_offsets().begin(), pb_problem.q_offsets().end()); + + mps_data.set_quadratic_objective_matrix(q_values.data(), + static_cast(q_values.size()), + q_indices.data(), + static_cast(q_indices.size()), + q_offsets.data(), + static_cast(q_offsets.size())); + } } - void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings, - pdlp_solver_settings_t& settings) + // Convert protobuf enum to cuOpt pdlp_solver_mode_t + pdlp_solver_mode_t from_proto_pdlp_mode(cuopt::remote::PDLPSolverMode mode) { - // Map from protobuf settings to cuOpt tolerances - settings.tolerances.absolute_gap_tolerance = pb_settings.eps_optimal_absolute(); - settings.tolerances.relative_gap_tolerance = pb_settings.eps_optimal_relative(); - settings.tolerances.primal_infeasible_tolerance = pb_settings.eps_primal_infeasible(); - settings.tolerances.dual_infeasible_tolerance = pb_settings.eps_dual_infeasible(); - - // Handle time limit: -1 or 0 means no limit (infinity) - int32_t time_limit_sec = pb_settings.time_sec_limit(); - if (time_limit_sec <= 0) { - settings.time_limit = std::numeric_limits::infinity(); - } else { - settings.time_limit = static_cast(time_limit_sec); + switch (mode) { + case cuopt::remote::PDLP_MODE_STABLE1: return pdlp_solver_mode_t::Stable1; + case cuopt::remote::PDLP_MODE_STABLE2: return pdlp_solver_mode_t::Stable2; + case cuopt::remote::PDLP_MODE_METHODICAL1: return pdlp_solver_mode_t::Methodical1; + case cuopt::remote::PDLP_MODE_FAST1: return pdlp_solver_mode_t::Fast1; + case cuopt::remote::PDLP_MODE_STABLE3: return pdlp_solver_mode_t::Stable3; + default: return pdlp_solver_mode_t::Stable3; } + } - // Handle iteration limit: -1 or 0 means no limit - int32_t iter_limit = pb_settings.iteration_limit(); - if (iter_limit <= 0) { - settings.iteration_limit = std::numeric_limits::max(); - } else { - settings.iteration_limit = static_cast(iter_limit); + // Convert protobuf enum to cuOpt method_t + method_t from_proto_method(cuopt::remote::LPMethod method) + { + switch (method) { + case cuopt::remote::METHOD_CONCURRENT: return method_t::Concurrent; + case cuopt::remote::METHOD_PDLP: return method_t::PDLP; + case cuopt::remote::METHOD_DUAL_SIMPLEX: return method_t::DualSimplex; + case cuopt::remote::METHOD_BARRIER: return method_t::Barrier; + default: return method_t::Concurrent; } + } - settings.log_to_console = pb_settings.verbosity() > 0; + void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings, + pdlp_solver_settings_t& settings) + { + // Termination tolerances (all names match cuOpt API) + settings.tolerances.absolute_gap_tolerance = pb_settings.absolute_gap_tolerance(); + settings.tolerances.relative_gap_tolerance = pb_settings.relative_gap_tolerance(); + settings.tolerances.primal_infeasible_tolerance = pb_settings.primal_infeasible_tolerance(); + settings.tolerances.dual_infeasible_tolerance = pb_settings.dual_infeasible_tolerance(); + settings.tolerances.absolute_dual_tolerance = pb_settings.absolute_dual_tolerance(); + settings.tolerances.relative_dual_tolerance = pb_settings.relative_dual_tolerance(); + settings.tolerances.absolute_primal_tolerance = pb_settings.absolute_primal_tolerance(); + settings.tolerances.relative_primal_tolerance = pb_settings.relative_primal_tolerance(); + + // Limits + settings.time_limit = pb_settings.time_limit(); + settings.iteration_limit = static_cast(pb_settings.iteration_limit()); + + // Solver configuration + settings.log_to_console = pb_settings.log_to_console(); + settings.detect_infeasibility = pb_settings.detect_infeasibility(); + settings.strict_infeasibility = pb_settings.strict_infeasibility(); + settings.pdlp_solver_mode = from_proto_pdlp_mode(pb_settings.pdlp_solver_mode()); + settings.method = from_proto_method(pb_settings.method()); + settings.presolve = pb_settings.presolve(); + settings.dual_postsolve = pb_settings.dual_postsolve(); + settings.crossover = pb_settings.crossover(); + settings.num_gpus = pb_settings.num_gpus(); + + // Advanced options + settings.per_constraint_residual = pb_settings.per_constraint_residual(); + settings.cudss_deterministic = pb_settings.cudss_deterministic(); + settings.folding = pb_settings.folding(); + settings.augmented = pb_settings.augmented(); + settings.dualize = pb_settings.dualize(); + settings.ordering = pb_settings.ordering(); + settings.barrier_dual_initial_point = pb_settings.barrier_dual_initial_point(); + settings.eliminate_dense_columns = pb_settings.eliminate_dense_columns(); + settings.save_best_primal_so_far = pb_settings.save_best_primal_so_far(); + settings.first_primal_feasible = pb_settings.first_primal_feasible(); } void proto_to_mip_settings(const cuopt::remote::MIPSolverSettings& pb_settings, mip_solver_settings_t& settings) { - settings.time_limit = pb_settings.time_limit(); - settings.tolerances.relative_mip_gap = pb_settings.mip_gap(); - // Note: verbosity not directly supported in mip_solver_settings_t + // Limits + settings.time_limit = pb_settings.time_limit(); + + // Tolerances (all names match cuOpt API) + settings.tolerances.relative_mip_gap = pb_settings.relative_mip_gap(); + settings.tolerances.absolute_mip_gap = pb_settings.absolute_mip_gap(); + settings.tolerances.integrality_tolerance = pb_settings.integrality_tolerance(); + settings.tolerances.absolute_tolerance = pb_settings.absolute_tolerance(); + settings.tolerances.relative_tolerance = pb_settings.relative_tolerance(); + settings.tolerances.presolve_absolute_tolerance = pb_settings.presolve_absolute_tolerance(); + + // Solver configuration + settings.log_to_console = pb_settings.log_to_console(); + settings.heuristics_only = pb_settings.heuristics_only(); + settings.num_cpu_threads = pb_settings.num_cpu_threads(); + settings.num_gpus = pb_settings.num_gpus(); + settings.presolve = pb_settings.presolve(); + settings.mip_scaling = pb_settings.mip_scaling(); } //============================================================================ diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index 02a4c649d..8821c8df9 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -68,16 +68,70 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack_uint32(protocol_version()); pack_problem(pk, view); - // Pack LP settings - pk.pack_map(4); + // Pack all LP settings (field names match cuOpt API) + pk.pack_map(28); + // Termination tolerances + pk.pack("absolute_gap_tolerance"); + pk.pack(settings.tolerances.absolute_gap_tolerance); + pk.pack("relative_gap_tolerance"); + pk.pack(settings.tolerances.relative_gap_tolerance); + pk.pack("primal_infeasible_tolerance"); + pk.pack(settings.tolerances.primal_infeasible_tolerance); + pk.pack("dual_infeasible_tolerance"); + pk.pack(settings.tolerances.dual_infeasible_tolerance); + pk.pack("absolute_dual_tolerance"); + pk.pack(settings.tolerances.absolute_dual_tolerance); + pk.pack("relative_dual_tolerance"); + pk.pack(settings.tolerances.relative_dual_tolerance); + pk.pack("absolute_primal_tolerance"); + pk.pack(settings.tolerances.absolute_primal_tolerance); + pk.pack("relative_primal_tolerance"); + pk.pack(settings.tolerances.relative_primal_tolerance); + // Limits pk.pack("time_limit"); pk.pack(settings.time_limit); pk.pack("iteration_limit"); pk.pack(static_cast(settings.iteration_limit)); - pk.pack("abs_gap_tol"); - pk.pack(settings.tolerances.absolute_gap_tolerance); - pk.pack("rel_gap_tol"); - pk.pack(settings.tolerances.relative_gap_tolerance); + // Solver configuration + pk.pack("log_to_console"); + pk.pack(settings.log_to_console); + pk.pack("detect_infeasibility"); + pk.pack(settings.detect_infeasibility); + pk.pack("strict_infeasibility"); + pk.pack(settings.strict_infeasibility); + pk.pack("pdlp_solver_mode"); + pk.pack(static_cast(settings.pdlp_solver_mode)); + pk.pack("method"); + pk.pack(static_cast(settings.method)); + pk.pack("presolve"); + pk.pack(settings.presolve); + pk.pack("dual_postsolve"); + pk.pack(settings.dual_postsolve); + pk.pack("crossover"); + pk.pack(settings.crossover); + pk.pack("num_gpus"); + pk.pack(settings.num_gpus); + // Advanced options + pk.pack("per_constraint_residual"); + pk.pack(settings.per_constraint_residual); + pk.pack("cudss_deterministic"); + pk.pack(settings.cudss_deterministic); + pk.pack("folding"); + pk.pack(settings.folding); + pk.pack("augmented"); + pk.pack(settings.augmented); + pk.pack("dualize"); + pk.pack(settings.dualize); + pk.pack("ordering"); + pk.pack(settings.ordering); + pk.pack("barrier_dual_initial_point"); + pk.pack(settings.barrier_dual_initial_point); + pk.pack("eliminate_dense_columns"); + pk.pack(settings.eliminate_dense_columns); + pk.pack("save_best_primal_so_far"); + pk.pack(settings.save_best_primal_so_far); + pk.pack("first_primal_feasible"); + pk.pack(settings.first_primal_feasible); return std::vector(buffer.data(), buffer.data() + buffer.size()); } @@ -93,11 +147,37 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack_uint32(protocol_version()); pack_problem(pk, view); - pk.pack_map(2); + // Pack all MIP settings (field names match cuOpt API) + pk.pack_map(13); + // Limits pk.pack("time_limit"); pk.pack(settings.time_limit); - pk.pack("mip_gap"); + // Tolerances + pk.pack("relative_mip_gap"); pk.pack(settings.tolerances.relative_mip_gap); + pk.pack("absolute_mip_gap"); + pk.pack(settings.tolerances.absolute_mip_gap); + pk.pack("integrality_tolerance"); + pk.pack(settings.tolerances.integrality_tolerance); + pk.pack("absolute_tolerance"); + pk.pack(settings.tolerances.absolute_tolerance); + pk.pack("relative_tolerance"); + pk.pack(settings.tolerances.relative_tolerance); + pk.pack("presolve_absolute_tolerance"); + pk.pack(settings.tolerances.presolve_absolute_tolerance); + // Solver configuration + pk.pack("log_to_console"); + pk.pack(settings.log_to_console); + pk.pack("heuristics_only"); + pk.pack(settings.heuristics_only); + pk.pack("num_cpu_threads"); + pk.pack(settings.num_cpu_threads); + pk.pack("num_gpus"); + pk.pack(settings.num_gpus); + pk.pack("presolve"); + pk.pack(settings.presolve); + pk.pack("mip_scaling"); + pk.pack(settings.mip_scaling); return std::vector(buffer.data(), buffer.data() + buffer.size()); } @@ -229,17 +309,102 @@ class msgpack_serializer_t : public remote_serializer_t { msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); auto settings_map = oh_settings.get().as>(); + // Deserialize all LP settings (field names match cuOpt API) + // Termination tolerances + if (settings_map.count("absolute_gap_tolerance")) { + settings.tolerances.absolute_gap_tolerance = + settings_map["absolute_gap_tolerance"].as(); + } + if (settings_map.count("relative_gap_tolerance")) { + settings.tolerances.relative_gap_tolerance = + settings_map["relative_gap_tolerance"].as(); + } + if (settings_map.count("primal_infeasible_tolerance")) { + settings.tolerances.primal_infeasible_tolerance = + settings_map["primal_infeasible_tolerance"].as(); + } + if (settings_map.count("dual_infeasible_tolerance")) { + settings.tolerances.dual_infeasible_tolerance = + settings_map["dual_infeasible_tolerance"].as(); + } + if (settings_map.count("absolute_dual_tolerance")) { + settings.tolerances.absolute_dual_tolerance = + settings_map["absolute_dual_tolerance"].as(); + } + if (settings_map.count("relative_dual_tolerance")) { + settings.tolerances.relative_dual_tolerance = + settings_map["relative_dual_tolerance"].as(); + } + if (settings_map.count("absolute_primal_tolerance")) { + settings.tolerances.absolute_primal_tolerance = + settings_map["absolute_primal_tolerance"].as(); + } + if (settings_map.count("relative_primal_tolerance")) { + settings.tolerances.relative_primal_tolerance = + settings_map["relative_primal_tolerance"].as(); + } + // Limits if (settings_map.count("time_limit")) { settings.time_limit = settings_map["time_limit"].as(); } if (settings_map.count("iteration_limit")) { settings.iteration_limit = settings_map["iteration_limit"].as(); } - if (settings_map.count("abs_gap_tol")) { - settings.tolerances.absolute_gap_tolerance = settings_map["abs_gap_tol"].as(); + // Solver configuration + if (settings_map.count("log_to_console")) { + settings.log_to_console = settings_map["log_to_console"].as(); + } + if (settings_map.count("detect_infeasibility")) { + settings.detect_infeasibility = settings_map["detect_infeasibility"].as(); + } + if (settings_map.count("strict_infeasibility")) { + settings.strict_infeasibility = settings_map["strict_infeasibility"].as(); + } + if (settings_map.count("pdlp_solver_mode")) { + settings.pdlp_solver_mode = + static_cast(settings_map["pdlp_solver_mode"].as()); + } + if (settings_map.count("method")) { + settings.method = static_cast(settings_map["method"].as()); + } + if (settings_map.count("presolve")) { + settings.presolve = settings_map["presolve"].as(); + } + if (settings_map.count("dual_postsolve")) { + settings.dual_postsolve = settings_map["dual_postsolve"].as(); + } + if (settings_map.count("crossover")) { + settings.crossover = settings_map["crossover"].as(); + } + if (settings_map.count("num_gpus")) { + settings.num_gpus = settings_map["num_gpus"].as(); + } + // Advanced options + if (settings_map.count("per_constraint_residual")) { + settings.per_constraint_residual = settings_map["per_constraint_residual"].as(); + } + if (settings_map.count("cudss_deterministic")) { + settings.cudss_deterministic = settings_map["cudss_deterministic"].as(); + } + if (settings_map.count("folding")) { settings.folding = settings_map["folding"].as(); } + if (settings_map.count("augmented")) { + settings.augmented = settings_map["augmented"].as(); } - if (settings_map.count("rel_gap_tol")) { - settings.tolerances.relative_gap_tolerance = settings_map["rel_gap_tol"].as(); + if (settings_map.count("dualize")) { settings.dualize = settings_map["dualize"].as(); } + if (settings_map.count("ordering")) { + settings.ordering = settings_map["ordering"].as(); + } + if (settings_map.count("barrier_dual_initial_point")) { + settings.barrier_dual_initial_point = settings_map["barrier_dual_initial_point"].as(); + } + if (settings_map.count("eliminate_dense_columns")) { + settings.eliminate_dense_columns = settings_map["eliminate_dense_columns"].as(); + } + if (settings_map.count("save_best_primal_so_far")) { + settings.save_best_primal_so_far = settings_map["save_best_primal_so_far"].as(); + } + if (settings_map.count("first_primal_feasible")) { + settings.first_primal_feasible = settings_map["first_primal_feasible"].as(); } return true; @@ -270,11 +435,50 @@ class msgpack_serializer_t : public remote_serializer_t { msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); auto settings_map = oh_settings.get().as>(); + // Deserialize all MIP settings (field names match cuOpt API) + // Limits if (settings_map.count("time_limit")) { settings.time_limit = settings_map["time_limit"].as(); } - if (settings_map.count("mip_gap")) { - settings.tolerances.relative_mip_gap = settings_map["mip_gap"].as(); + // Tolerances + if (settings_map.count("relative_mip_gap")) { + settings.tolerances.relative_mip_gap = settings_map["relative_mip_gap"].as(); + } + if (settings_map.count("absolute_mip_gap")) { + settings.tolerances.absolute_mip_gap = settings_map["absolute_mip_gap"].as(); + } + if (settings_map.count("integrality_tolerance")) { + settings.tolerances.integrality_tolerance = + settings_map["integrality_tolerance"].as(); + } + if (settings_map.count("absolute_tolerance")) { + settings.tolerances.absolute_tolerance = settings_map["absolute_tolerance"].as(); + } + if (settings_map.count("relative_tolerance")) { + settings.tolerances.relative_tolerance = settings_map["relative_tolerance"].as(); + } + if (settings_map.count("presolve_absolute_tolerance")) { + settings.tolerances.presolve_absolute_tolerance = + settings_map["presolve_absolute_tolerance"].as(); + } + // Solver configuration + if (settings_map.count("log_to_console")) { + settings.log_to_console = settings_map["log_to_console"].as(); + } + if (settings_map.count("heuristics_only")) { + settings.heuristics_only = settings_map["heuristics_only"].as(); + } + if (settings_map.count("num_cpu_threads")) { + settings.num_cpu_threads = settings_map["num_cpu_threads"].as(); + } + if (settings_map.count("num_gpus")) { + settings.num_gpus = settings_map["num_gpus"].as(); + } + if (settings_map.count("presolve")) { + settings.presolve = settings_map["presolve"].as(); + } + if (settings_map.count("mip_scaling")) { + settings.mip_scaling = settings_map["mip_scaling"].as(); } return true; @@ -369,16 +573,70 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack_uint32(protocol_version()); pack_problem(pk, view); - // Pack LP settings - pk.pack_map(4); + // Pack all LP settings (field names match cuOpt API) + pk.pack_map(28); + // Termination tolerances + pk.pack("absolute_gap_tolerance"); + pk.pack(settings.tolerances.absolute_gap_tolerance); + pk.pack("relative_gap_tolerance"); + pk.pack(settings.tolerances.relative_gap_tolerance); + pk.pack("primal_infeasible_tolerance"); + pk.pack(settings.tolerances.primal_infeasible_tolerance); + pk.pack("dual_infeasible_tolerance"); + pk.pack(settings.tolerances.dual_infeasible_tolerance); + pk.pack("absolute_dual_tolerance"); + pk.pack(settings.tolerances.absolute_dual_tolerance); + pk.pack("relative_dual_tolerance"); + pk.pack(settings.tolerances.relative_dual_tolerance); + pk.pack("absolute_primal_tolerance"); + pk.pack(settings.tolerances.absolute_primal_tolerance); + pk.pack("relative_primal_tolerance"); + pk.pack(settings.tolerances.relative_primal_tolerance); + // Limits pk.pack("time_limit"); pk.pack(settings.time_limit); pk.pack("iteration_limit"); pk.pack(static_cast(settings.iteration_limit)); - pk.pack("abs_gap_tol"); - pk.pack(settings.tolerances.absolute_gap_tolerance); - pk.pack("rel_gap_tol"); - pk.pack(settings.tolerances.relative_gap_tolerance); + // Solver configuration + pk.pack("log_to_console"); + pk.pack(settings.log_to_console); + pk.pack("detect_infeasibility"); + pk.pack(settings.detect_infeasibility); + pk.pack("strict_infeasibility"); + pk.pack(settings.strict_infeasibility); + pk.pack("pdlp_solver_mode"); + pk.pack(static_cast(settings.pdlp_solver_mode)); + pk.pack("method"); + pk.pack(static_cast(settings.method)); + pk.pack("presolve"); + pk.pack(settings.presolve); + pk.pack("dual_postsolve"); + pk.pack(settings.dual_postsolve); + pk.pack("crossover"); + pk.pack(settings.crossover); + pk.pack("num_gpus"); + pk.pack(settings.num_gpus); + // Advanced options + pk.pack("per_constraint_residual"); + pk.pack(settings.per_constraint_residual); + pk.pack("cudss_deterministic"); + pk.pack(settings.cudss_deterministic); + pk.pack("folding"); + pk.pack(settings.folding); + pk.pack("augmented"); + pk.pack(settings.augmented); + pk.pack("dualize"); + pk.pack(settings.dualize); + pk.pack("ordering"); + pk.pack(settings.ordering); + pk.pack("barrier_dual_initial_point"); + pk.pack(settings.barrier_dual_initial_point); + pk.pack("eliminate_dense_columns"); + pk.pack(settings.eliminate_dense_columns); + pk.pack("save_best_primal_so_far"); + pk.pack(settings.save_best_primal_so_far); + pk.pack("first_primal_feasible"); + pk.pack(settings.first_primal_feasible); return std::vector(buffer.data(), buffer.data() + buffer.size()); } @@ -401,11 +659,37 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack_uint32(protocol_version()); pack_problem(pk, view); - pk.pack_map(2); + // Pack all MIP settings (field names match cuOpt API) + pk.pack_map(13); + // Limits pk.pack("time_limit"); pk.pack(settings.time_limit); - pk.pack("mip_gap"); + // Tolerances + pk.pack("relative_mip_gap"); pk.pack(settings.tolerances.relative_mip_gap); + pk.pack("absolute_mip_gap"); + pk.pack(settings.tolerances.absolute_mip_gap); + pk.pack("integrality_tolerance"); + pk.pack(settings.tolerances.integrality_tolerance); + pk.pack("absolute_tolerance"); + pk.pack(settings.tolerances.absolute_tolerance); + pk.pack("relative_tolerance"); + pk.pack(settings.tolerances.relative_tolerance); + pk.pack("presolve_absolute_tolerance"); + pk.pack(settings.tolerances.presolve_absolute_tolerance); + // Solver configuration + pk.pack("log_to_console"); + pk.pack(settings.log_to_console); + pk.pack("heuristics_only"); + pk.pack(settings.heuristics_only); + pk.pack("num_cpu_threads"); + pk.pack(settings.num_cpu_threads); + pk.pack("num_gpus"); + pk.pack(settings.num_gpus); + pk.pack("presolve"); + pk.pack(settings.presolve); + pk.pack("mip_scaling"); + pk.pack(settings.mip_scaling); return std::vector(buffer.data(), buffer.data() + buffer.size()); } @@ -840,6 +1124,7 @@ class msgpack_serializer_t : public remote_serializer_t { void pack_problem(msgpack::packer& pk, const mps_parser::data_model_view_t& view) { + // Field names match data_model_view_t (without trailing underscore) auto offsets_span = view.get_constraint_matrix_offsets(); auto values_span = view.get_constraint_matrix_values(); auto obj_span = view.get_objective_coefficients(); @@ -848,26 +1133,41 @@ class msgpack_serializer_t : public remote_serializer_t { i_t n_cols = static_cast(obj_span.size()); i_t nnz = static_cast(values_span.size()); - pk.pack_map(13); + // Count fields: base 20, plus optional fields + int num_fields = 20; + auto init_primal_span = view.get_initial_primal_solution(); + auto init_dual_span = view.get_initial_dual_solution(); + if (init_primal_span.size() > 0) num_fields++; + if (init_dual_span.size() > 0) num_fields++; + if (view.has_quadratic_objective()) num_fields += 3; + + pk.pack_map(num_fields); + + // Problem metadata + pk.pack("problem_name"); + pk.pack(view.get_problem_name()); + pk.pack("objective_name"); + pk.pack(view.get_objective_name()); + pk.pack("maximize"); + pk.pack(view.get_sense()); + pk.pack("objective_scaling_factor"); + pk.pack(static_cast(view.get_objective_scaling_factor())); + pk.pack("objective_offset"); + pk.pack(static_cast(view.get_objective_offset())); + // Dimensions pk.pack("n_rows"); pk.pack(static_cast(n_rows)); - pk.pack("n_cols"); pk.pack(static_cast(n_cols)); - pk.pack("nnz"); pk.pack(static_cast(nnz)); - pk.pack("maximize"); - pk.pack(view.get_sense()); - - // CSR matrix + // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_) pk.pack("A_values"); - auto A_vals = view.get_constraint_matrix_values(); - pk.pack_array(A_vals.size()); - for (size_t i = 0; i < A_vals.size(); ++i) { - pk.pack(static_cast(A_vals.data()[i])); + pk.pack_array(values_span.size()); + for (size_t i = 0; i < values_span.size(); ++i) { + pk.pack(static_cast(values_span.data()[i])); } pk.pack("A_indices"); @@ -878,35 +1178,48 @@ class msgpack_serializer_t : public remote_serializer_t { } pk.pack("A_offsets"); - auto A_off = view.get_constraint_matrix_offsets(); - pk.pack_array(A_off.size()); - for (size_t i = 0; i < A_off.size(); ++i) { - pk.pack(static_cast(A_off.data()[i])); + pk.pack_array(offsets_span.size()); + for (size_t i = 0; i < offsets_span.size(); ++i) { + pk.pack(static_cast(offsets_span.data()[i])); } - pk.pack("obj_coeffs"); - auto obj = view.get_objective_coefficients(); - pk.pack_array(obj.size()); - for (size_t i = 0; i < obj.size(); ++i) { - pk.pack(static_cast(obj.data()[i])); + // Objective coefficients c (name matches data_model_view_t: c_) + pk.pack("c"); + pk.pack_array(obj_span.size()); + for (size_t i = 0; i < obj_span.size(); ++i) { + pk.pack(static_cast(obj_span.data()[i])); } - pk.pack("var_lb"); + // Variable bounds + pk.pack("variable_lower_bounds"); auto vlb = view.get_variable_lower_bounds(); pk.pack_array(vlb.size()); for (size_t i = 0; i < vlb.size(); ++i) { pk.pack(static_cast(vlb.data()[i])); } - pk.pack("var_ub"); + pk.pack("variable_upper_bounds"); auto vub = view.get_variable_upper_bounds(); pk.pack_array(vub.size()); for (size_t i = 0; i < vub.size(); ++i) { pk.pack(static_cast(vub.data()[i])); } - // Constraint bounds - derive from row_types if needed - pk.pack("con_lb"); + // Constraint bounds b (RHS) + pk.pack("b"); + auto b_span = view.get_constraint_bounds(); + pk.pack_array(b_span.size()); + for (size_t i = 0; i < b_span.size(); ++i) { + pk.pack(static_cast(b_span.data()[i])); + } + + // Row types + pk.pack("row_types"); + auto rt_span = view.get_row_types(); + pk.pack(std::string(rt_span.data(), rt_span.size())); + + // Constraint lower/upper bounds + pk.pack("constraint_lower_bounds"); auto clb = view.get_constraint_lower_bounds(); if (clb.size() > 0) { pk.pack_array(clb.size()); @@ -914,18 +1227,10 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack(static_cast(clb.data()[i])); } } else { - auto b_span = view.get_constraint_bounds(); - auto rt_span = view.get_row_types(); - pk.pack_array(n_rows); - for (i_t i = 0; i < n_rows; ++i) { - char rt = (rt_span.size() > 0) ? rt_span.data()[i] : 'E'; - f_t b = (b_span.size() > 0) ? b_span.data()[i] : 0; - f_t lb = (rt == 'G' || rt == 'E') ? b : -1e20; - pk.pack(static_cast(lb)); - } + pk.pack_array(0); } - pk.pack("con_ub"); + pk.pack("constraint_upper_bounds"); auto cub = view.get_constraint_upper_bounds(); if (cub.size() > 0) { pk.pack_array(cub.size()); @@ -933,24 +1238,53 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack(static_cast(cub.data()[i])); } } else { - auto b_span = view.get_constraint_bounds(); - auto rt_span = view.get_row_types(); - pk.pack_array(n_rows); - for (i_t i = 0; i < n_rows; ++i) { - char rt = (rt_span.size() > 0) ? rt_span.data()[i] : 'E'; - f_t b = (b_span.size() > 0) ? b_span.data()[i] : 0; - f_t ub = (rt == 'L' || rt == 'E') ? b : 1e20; - pk.pack(static_cast(ub)); - } + pk.pack_array(0); } - // Variable types - pk.pack("var_types"); + // Variable types (name matches data_model_view_t: variable_types_) + pk.pack("variable_types"); auto vt = view.get_variable_types(); - pk.pack_array(n_cols); - for (i_t i = 0; i < n_cols; ++i) { - char t = (vt.size() > 0) ? vt.data()[i] : 'C'; - pk.pack(std::string(1, t)); + pk.pack(std::string(vt.data(), vt.size())); + + // Initial solutions (if available) + if (init_primal_span.size() > 0) { + pk.pack("initial_primal_solution"); + pk.pack_array(init_primal_span.size()); + for (size_t i = 0; i < init_primal_span.size(); ++i) { + pk.pack(static_cast(init_primal_span.data()[i])); + } + } + + if (init_dual_span.size() > 0) { + pk.pack("initial_dual_solution"); + pk.pack_array(init_dual_span.size()); + for (size_t i = 0; i < init_dual_span.size(); ++i) { + pk.pack(static_cast(init_dual_span.data()[i])); + } + } + + // Quadratic objective matrix Q (for QPS problems) + if (view.has_quadratic_objective()) { + pk.pack("Q_values"); + auto q_vals = view.get_quadratic_objective_values(); + pk.pack_array(q_vals.size()); + for (size_t i = 0; i < q_vals.size(); ++i) { + pk.pack(static_cast(q_vals.data()[i])); + } + + pk.pack("Q_indices"); + auto q_idx = view.get_quadratic_objective_indices(); + pk.pack_array(q_idx.size()); + for (size_t i = 0; i < q_idx.size(); ++i) { + pk.pack(static_cast(q_idx.data()[i])); + } + + pk.pack("Q_offsets"); + auto q_off = view.get_quadratic_objective_offsets(); + pk.pack_array(q_off.size()); + for (size_t i = 0; i < q_off.size(); ++i) { + pk.pack(static_cast(q_off.data()[i])); + } } } @@ -958,15 +1292,29 @@ class msgpack_serializer_t : public remote_serializer_t { size_t& offset, mps_parser::mps_data_model_t& mps_data) { + // Field names match data_model_view_t (without trailing underscore) msgpack::object_handle oh = msgpack::unpack(reinterpret_cast(data.data()), data.size(), offset); auto problem_map = oh.get().as>(); - i_t n_cols = problem_map["n_cols"].as(); - - mps_data.set_maximize(problem_map["maximize"].as()); + // Problem metadata + if (problem_map.count("problem_name")) { + mps_data.set_problem_name(problem_map["problem_name"].as()); + } + if (problem_map.count("objective_name")) { + mps_data.set_objective_name(problem_map["objective_name"].as()); + } + if (problem_map.count("maximize")) { + mps_data.set_maximize(problem_map["maximize"].as()); + } + if (problem_map.count("objective_scaling_factor")) { + mps_data.set_objective_scaling_factor(problem_map["objective_scaling_factor"].as()); + } + if (problem_map.count("objective_offset")) { + mps_data.set_objective_offset(problem_map["objective_offset"].as()); + } - // CSR matrix + // Constraint matrix A in CSR format std::vector A_values; problem_map["A_values"].convert(A_values); std::vector A_indices; @@ -981,39 +1329,89 @@ class msgpack_serializer_t : public remote_serializer_t { A_offsets.data(), static_cast(A_offsets.size())); - std::vector obj_coeffs; - problem_map["obj_coeffs"].convert(obj_coeffs); - mps_data.set_objective_coefficients(obj_coeffs.data(), static_cast(obj_coeffs.size())); + // Objective coefficients c + std::vector c; + problem_map["c"].convert(c); + mps_data.set_objective_coefficients(c.data(), static_cast(c.size())); + // Variable bounds std::vector var_lb, var_ub; - problem_map["var_lb"].convert(var_lb); - problem_map["var_ub"].convert(var_ub); + problem_map["variable_lower_bounds"].convert(var_lb); + problem_map["variable_upper_bounds"].convert(var_ub); mps_data.set_variable_lower_bounds(var_lb.data(), static_cast(var_lb.size())); mps_data.set_variable_upper_bounds(var_ub.data(), static_cast(var_ub.size())); - std::vector con_lb, con_ub; - problem_map["con_lb"].convert(con_lb); - problem_map["con_ub"].convert(con_ub); - mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); - mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); - - // Variable types - std::vector var_types; - if (problem_map["var_types"].type == msgpack::type::ARRAY) { - auto arr = problem_map["var_types"].via.array; - for (size_t i = 0; i < arr.size; ++i) { - if (arr.ptr[i].type == msgpack::type::STR) { - std::string s; - arr.ptr[i].convert(s); - var_types.push_back(s.empty() ? 'C' : s[0]); - } else { - var_types.push_back('C'); - } + // Constraint bounds (prefer lower/upper bounds if available) + if (problem_map.count("constraint_lower_bounds")) { + std::vector con_lb; + problem_map["constraint_lower_bounds"].convert(con_lb); + if (con_lb.size() > 0) { + std::vector con_ub; + problem_map["constraint_upper_bounds"].convert(con_ub); + mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast(con_lb.size())); + mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast(con_ub.size())); + } + } + + // Constraint bounds b (RHS) + row_types format + if (problem_map.count("b")) { + std::vector b; + problem_map["b"].convert(b); + if (b.size() > 0) { mps_data.set_constraint_bounds(b.data(), static_cast(b.size())); } + } + + if (problem_map.count("row_types")) { + std::string row_types_str = problem_map["row_types"].as(); + if (!row_types_str.empty()) { + mps_data.set_row_types(row_types_str.data(), static_cast(row_types_str.size())); + } + } + + // Variable types (stored as string, matching data_model_view_t) + if (problem_map.count("variable_types")) { + std::string var_types_str = problem_map["variable_types"].as(); + if (!var_types_str.empty()) { + std::vector var_types(var_types_str.begin(), var_types_str.end()); + mps_data.set_variable_types(var_types); + } + } + + // Initial solutions (if provided) + if (problem_map.count("initial_primal_solution")) { + std::vector init_primal; + problem_map["initial_primal_solution"].convert(init_primal); + if (init_primal.size() > 0) { + mps_data.set_initial_primal_solution(init_primal.data(), + static_cast(init_primal.size())); + } + } + + if (problem_map.count("initial_dual_solution")) { + std::vector init_dual; + problem_map["initial_dual_solution"].convert(init_dual); + if (init_dual.size() > 0) { + mps_data.set_initial_dual_solution(init_dual.data(), static_cast(init_dual.size())); + } + } + + // Quadratic objective matrix Q (for QPS problems) + if (problem_map.count("Q_values")) { + std::vector Q_values; + std::vector Q_indices; + std::vector Q_offsets; + problem_map["Q_values"].convert(Q_values); + problem_map["Q_indices"].convert(Q_indices); + problem_map["Q_offsets"].convert(Q_offsets); + + if (Q_values.size() > 0) { + mps_data.set_quadratic_objective_matrix(Q_values.data(), + static_cast(Q_values.size()), + Q_indices.data(), + static_cast(Q_indices.size()), + Q_offsets.data(), + static_cast(Q_offsets.size())); } - } else { - var_types.resize(n_cols, 'C'); } - mps_data.set_variable_types(var_types); } }; From 0af43ea8676aba0bcd32fd12cb7ddb768de4e71e Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 11:16:35 -0500 Subject: [PATCH 20/37] Align protobuf/msgpack names exactly with cuOpt API - Rename A_values -> A (matches data_model_view_t::A_) - Remove unused problem_category field and enum - Rename PDLPSolverMode enum values to match pdlp_solver_mode_t: PDLP_MODE_STABLE1 -> Stable1, etc. - Rename LPMethod enum values to match method_t: METHOD_CONCURRENT -> Concurrent, METHOD_BARRIER -> Barrier, etc. - Update protobuf_serializer.cu for new enum names - Update msgpack_serializer.cpp for A field rename --- .../utilities/cuopt_remote.proto | 31 ++++++------- .../utilities/protobuf_serializer.cu | 44 +++++++++---------- .../serializers/msgpack_serializer.cpp | 4 +- 3 files changed, 37 insertions(+), 42 deletions(-) diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto index 5b64e5055..cd48f9587 100644 --- a/cpp/src/linear_programming/utilities/cuopt_remote.proto +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -36,14 +36,14 @@ message OptimizationProblem { bool maximize = 3; double objective_scaling_factor = 4; double objective_offset = 5; - ProblemCategory problem_category = 6; + // Field 6 reserved (was problem_category, now inferred from variable_types) // Variable and row names (optional) repeated string variable_names = 7; repeated string row_names = 8; // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_) - repeated double A_values = 10; + repeated double A = 10; repeated int32 A_indices = 11; repeated int32 A_offsets = 12; @@ -71,27 +71,22 @@ message OptimizationProblem { repeated int32 Q_offsets = 52; } -enum ProblemCategory { - LP_PROBLEM = 0; - MIP_PROBLEM = 1; - IP_PROBLEM = 2; -} - // PDLP solver mode enum (matches cuOpt pdlp_solver_mode_t) +// Matches cuOpt pdlp_solver_mode_t enum values enum PDLPSolverMode { - PDLP_MODE_STABLE1 = 0; - PDLP_MODE_STABLE2 = 1; - PDLP_MODE_METHODICAL1 = 2; - PDLP_MODE_FAST1 = 3; - PDLP_MODE_STABLE3 = 4; + Stable1 = 0; + Stable2 = 1; + Methodical1 = 2; + Fast1 = 3; + Stable3 = 4; } -// LP solver method enum (matches cuOpt method_t) +// Matches cuOpt method_t enum values enum LPMethod { - METHOD_CONCURRENT = 0; - METHOD_PDLP = 1; - METHOD_DUAL_SIMPLEX = 2; - METHOD_BARRIER = 3; + Concurrent = 0; + PDLP = 1; + DualSimplex = 2; + Barrier = 3; } // PDLP solver settings (field names match cuOpt Python/C++ API) diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 70f591f8b..28c60664d 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -826,7 +826,7 @@ class protobuf_serializer_t : public remote_serializer_t { // Constraint matrix A in CSR format (field names match data_model_view_t) for (i_t i = 0; i < nnz; ++i) { - pb_problem->add_a_values(static_cast(values_ptr[i])); + pb_problem->add_a(static_cast(values_ptr[i])); } for (i_t i = 0; i < nnz; ++i) { pb_problem->add_a_indices(static_cast(indices_ptr[i])); @@ -985,12 +985,12 @@ class protobuf_serializer_t : public remote_serializer_t { cuopt::remote::PDLPSolverMode to_proto_pdlp_mode(pdlp_solver_mode_t mode) { switch (mode) { - case pdlp_solver_mode_t::Stable1: return cuopt::remote::PDLP_MODE_STABLE1; - case pdlp_solver_mode_t::Stable2: return cuopt::remote::PDLP_MODE_STABLE2; - case pdlp_solver_mode_t::Methodical1: return cuopt::remote::PDLP_MODE_METHODICAL1; - case pdlp_solver_mode_t::Fast1: return cuopt::remote::PDLP_MODE_FAST1; - case pdlp_solver_mode_t::Stable3: return cuopt::remote::PDLP_MODE_STABLE3; - default: return cuopt::remote::PDLP_MODE_STABLE3; + case pdlp_solver_mode_t::Stable1: return cuopt::remote::Stable1; + case pdlp_solver_mode_t::Stable2: return cuopt::remote::Stable2; + case pdlp_solver_mode_t::Methodical1: return cuopt::remote::Methodical1; + case pdlp_solver_mode_t::Fast1: return cuopt::remote::Fast1; + case pdlp_solver_mode_t::Stable3: return cuopt::remote::Stable3; + default: return cuopt::remote::Stable3; } } @@ -998,11 +998,11 @@ class protobuf_serializer_t : public remote_serializer_t { cuopt::remote::LPMethod to_proto_method(method_t method) { switch (method) { - case method_t::Concurrent: return cuopt::remote::METHOD_CONCURRENT; - case method_t::PDLP: return cuopt::remote::METHOD_PDLP; - case method_t::DualSimplex: return cuopt::remote::METHOD_DUAL_SIMPLEX; - case method_t::Barrier: return cuopt::remote::METHOD_BARRIER; - default: return cuopt::remote::METHOD_CONCURRENT; + case method_t::Concurrent: return cuopt::remote::Concurrent; + case method_t::PDLP: return cuopt::remote::PDLP; + case method_t::DualSimplex: return cuopt::remote::DualSimplex; + case method_t::Barrier: return cuopt::remote::Barrier; + default: return cuopt::remote::Concurrent; } } @@ -1061,7 +1061,7 @@ class protobuf_serializer_t : public remote_serializer_t { mps_data.set_objective_offset(pb_problem.objective_offset()); // Constraint matrix A in CSR format (field names match data_model_view_t) - std::vector values(pb_problem.a_values().begin(), pb_problem.a_values().end()); + std::vector values(pb_problem.a().begin(), pb_problem.a().end()); std::vector indices(pb_problem.a_indices().begin(), pb_problem.a_indices().end()); std::vector offsets(pb_problem.a_offsets().begin(), pb_problem.a_offsets().end()); @@ -1157,11 +1157,11 @@ class protobuf_serializer_t : public remote_serializer_t { pdlp_solver_mode_t from_proto_pdlp_mode(cuopt::remote::PDLPSolverMode mode) { switch (mode) { - case cuopt::remote::PDLP_MODE_STABLE1: return pdlp_solver_mode_t::Stable1; - case cuopt::remote::PDLP_MODE_STABLE2: return pdlp_solver_mode_t::Stable2; - case cuopt::remote::PDLP_MODE_METHODICAL1: return pdlp_solver_mode_t::Methodical1; - case cuopt::remote::PDLP_MODE_FAST1: return pdlp_solver_mode_t::Fast1; - case cuopt::remote::PDLP_MODE_STABLE3: return pdlp_solver_mode_t::Stable3; + case cuopt::remote::Stable1: return pdlp_solver_mode_t::Stable1; + case cuopt::remote::Stable2: return pdlp_solver_mode_t::Stable2; + case cuopt::remote::Methodical1: return pdlp_solver_mode_t::Methodical1; + case cuopt::remote::Fast1: return pdlp_solver_mode_t::Fast1; + case cuopt::remote::Stable3: return pdlp_solver_mode_t::Stable3; default: return pdlp_solver_mode_t::Stable3; } } @@ -1170,10 +1170,10 @@ class protobuf_serializer_t : public remote_serializer_t { method_t from_proto_method(cuopt::remote::LPMethod method) { switch (method) { - case cuopt::remote::METHOD_CONCURRENT: return method_t::Concurrent; - case cuopt::remote::METHOD_PDLP: return method_t::PDLP; - case cuopt::remote::METHOD_DUAL_SIMPLEX: return method_t::DualSimplex; - case cuopt::remote::METHOD_BARRIER: return method_t::Barrier; + case cuopt::remote::Concurrent: return method_t::Concurrent; + case cuopt::remote::PDLP: return method_t::PDLP; + case cuopt::remote::DualSimplex: return method_t::DualSimplex; + case cuopt::remote::Barrier: return method_t::Barrier; default: return method_t::Concurrent; } } diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index 8821c8df9..19a3a18e3 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -1164,7 +1164,7 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack(static_cast(nnz)); // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_) - pk.pack("A_values"); + pk.pack("A"); pk.pack_array(values_span.size()); for (size_t i = 0; i < values_span.size(); ++i) { pk.pack(static_cast(values_span.data()[i])); @@ -1316,7 +1316,7 @@ class msgpack_serializer_t : public remote_serializer_t { // Constraint matrix A in CSR format std::vector A_values; - problem_map["A_values"].convert(A_values); + problem_map["A"].convert(A_values); std::vector A_indices; problem_map["A_indices"].convert(A_indices); std::vector A_offsets; From 2982a033a2031f1f0cc2df320fd9485b65b69222 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 11:46:21 -0500 Subject: [PATCH 21/37] Align msgpack field names with protobuf - Remove redundant n_rows, n_cols, nnz fields (sizes inferred from arrays) - Add variable_names and row_names fields (matching protobuf) - Msgpack and protobuf now use identical field names for OptimizationProblem --- .../serializers/msgpack_serializer.cpp | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index 19a3a18e3..f01b78547 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -1129,14 +1129,14 @@ class msgpack_serializer_t : public remote_serializer_t { auto values_span = view.get_constraint_matrix_values(); auto obj_span = view.get_objective_coefficients(); - i_t n_rows = static_cast(offsets_span.size()) - 1; - i_t n_cols = static_cast(obj_span.size()); - i_t nnz = static_cast(values_span.size()); - - // Count fields: base 20, plus optional fields - int num_fields = 20; - auto init_primal_span = view.get_initial_primal_solution(); - auto init_dual_span = view.get_initial_dual_solution(); + // Count fields: base 17, plus optional fields + int num_fields = 17; + auto init_primal_span = view.get_initial_primal_solution(); + auto init_dual_span = view.get_initial_dual_solution(); + const auto& var_names = view.get_variable_names(); + const auto& row_names_vec = view.get_row_names(); + if (!var_names.empty()) num_fields++; + if (!row_names_vec.empty()) num_fields++; if (init_primal_span.size() > 0) num_fields++; if (init_dual_span.size() > 0) num_fields++; if (view.has_quadratic_objective()) num_fields += 3; @@ -1155,13 +1155,21 @@ class msgpack_serializer_t : public remote_serializer_t { pk.pack("objective_offset"); pk.pack(static_cast(view.get_objective_offset())); - // Dimensions - pk.pack("n_rows"); - pk.pack(static_cast(n_rows)); - pk.pack("n_cols"); - pk.pack(static_cast(n_cols)); - pk.pack("nnz"); - pk.pack(static_cast(nnz)); + // Variable and row names (optional, matches protobuf) + if (!var_names.empty()) { + pk.pack("variable_names"); + pk.pack_array(var_names.size()); + for (const auto& name : var_names) { + pk.pack(name); + } + } + if (!row_names_vec.empty()) { + pk.pack("row_names"); + pk.pack_array(row_names_vec.size()); + for (const auto& name : row_names_vec) { + pk.pack(name); + } + } // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_) pk.pack("A"); @@ -1334,6 +1342,18 @@ class msgpack_serializer_t : public remote_serializer_t { problem_map["c"].convert(c); mps_data.set_objective_coefficients(c.data(), static_cast(c.size())); + // Variable and row names (optional) + if (problem_map.count("variable_names")) { + std::vector var_names; + problem_map["variable_names"].convert(var_names); + if (!var_names.empty()) { mps_data.set_variable_names(var_names); } + } + if (problem_map.count("row_names")) { + std::vector row_names; + problem_map["row_names"].convert(row_names); + if (!row_names.empty()) { mps_data.set_row_names(row_names); } + } + // Variable bounds std::vector var_lb, var_ub; problem_map["variable_lower_bounds"].convert(var_lb); From e4efe22c14973cfe85270427180579e3542db730 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 12:02:39 -0500 Subject: [PATCH 22/37] Dynamic shared memory and uint64 wire protocol for large problems Shared Memory Changes: - Replace fixed 64MB/128MB embedded data arrays with per-job shared memory - JobQueueEntry and ResultQueueEntry now store shm_data_name instead of data[] - Create /cuopt_job_ and /cuopt_result_ segments on demand - Cleanup segments after job completion or on error - Change data_size from uint32_t to uint64_t for >4GB support Wire Protocol Changes: - Change message size from uint32_t (4GB max) to uint64_t - Update both server (cuopt_remote_server.cpp) and client (remote_solve.cu) - Increase sanity check limit from 100MB to 16GB Benefits: - No fixed size limits - problems limited only by system RAM - No wasted memory for small problems - Queue entries are now ~256 bytes instead of 64MB+ - Supports very large LP/MIP problems --- cpp/cuopt_remote_server.cpp | 281 +++++++++++++++--- .../utilities/remote_solve.cu | 20 +- 2 files changed, 243 insertions(+), 58 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index d75a7a3e3..ac2af7cf4 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -58,27 +58,27 @@ using namespace cuopt::linear_programming; // Shared Memory Structures (must match between main process and workers) // ============================================================================ -constexpr size_t MAX_JOBS = 100; -constexpr size_t MAX_RESULTS = 100; -constexpr size_t MAX_JOB_DATA_SIZE = 64 * 1024 * 1024; // 64MB max problem size -constexpr size_t MAX_RESULT_SIZE = 128 * 1024 * 1024; // 128MB max result size +constexpr size_t MAX_JOBS = 100; +constexpr size_t MAX_RESULTS = 100; +// Job queue entry - small fixed size, data stored in separate per-job shared memory struct JobQueueEntry { char job_id[64]; - uint32_t problem_type; // 0 = LP, 1 = MIP - uint32_t data_size; - uint8_t data[MAX_JOB_DATA_SIZE]; + uint32_t problem_type; // 0 = LP, 1 = MIP + uint64_t data_size; // Size of problem data (uint64 for large problems) + char shm_data_name[128]; // Name of per-job shared memory segment std::atomic ready; // Job is ready to be processed std::atomic claimed; // Worker has claimed this job std::atomic worker_pid; // PID of worker that claimed this job (0 if none) std::atomic cancelled; // Job has been cancelled (worker should skip) }; +// Result queue entry - small fixed size, data stored in separate per-result shared memory struct ResultQueueEntry { char job_id[64]; - uint32_t status; // 0 = success, 1 = error - uint32_t data_size; - uint8_t data[MAX_RESULT_SIZE]; + uint32_t status; // 0 = success, 1 = error + uint64_t data_size; // Size of result data (uint64 for large results) + char shm_data_name[128]; // Name of per-result shared memory segment char error_message[1024]; std::atomic ready; // Result is ready std::atomic retrieved; // Result has been retrieved @@ -103,7 +103,7 @@ enum class MessageType : uint8_t { static bool send_typed_message(int sockfd, MessageType type, const void* data, size_t size) { uint8_t msg_type = static_cast(type); - uint32_t payload_size = static_cast(size); + uint64_t payload_size = static_cast(size); if (::write(sockfd, &msg_type, 1) != 1) return false; if (::write(sockfd, &payload_size, 4) != 4) return false; @@ -324,10 +324,11 @@ static bool send_solution_message(int sockfd, const std::vector& data) static bool receive_request(int sockfd, std::vector& data) { - uint32_t size; + uint64_t size; if (!read_all(sockfd, &size, sizeof(size))) return false; - if (size > 100 * 1024 * 1024) { + // Sanity check - reject requests larger than 16GB + if (size > 16ULL * 1024 * 1024 * 1024) { std::cerr << "[Server] Request too large: " << size << " bytes\n"; return false; } @@ -337,6 +338,17 @@ static bool receive_request(int sockfd, std::vector& data) return true; } +// ============================================================================ +// Per-job Shared Memory Helpers (forward declarations) +// ============================================================================ + +static std::string create_job_shm(const std::string& job_id, + const std::vector& data, + const char* prefix); +static bool read_job_shm(const char* shm_name, size_t data_size, std::vector& data); +static std::string write_result_shm(const std::string& job_id, const std::vector& data); +static void cleanup_job_shm(const char* shm_name); + // ============================================================================ // Shared Memory Management // ============================================================================ @@ -456,8 +468,8 @@ void worker_process(int worker_id) // Increment active worker count shm_ctrl->active_workers++; - // Create RAFT handle for GPU operations - raft::handle_t handle; + // NOTE: We create raft::handle_t AFTER stdout redirect (per-job) so that + // CUDA logging uses the redirected output streams. // Get serializer auto serializer = get_serializer(); @@ -494,12 +506,16 @@ void worker_process(int worker_id) << "\n"; std::cout.flush(); + // Cleanup job input shm + cleanup_job_shm(job.shm_data_name); + // Store cancelled result in result queue for (size_t i = 0; i < MAX_RESULTS; ++i) { if (!result_queue[i].ready) { strncpy(result_queue[i].job_id, job_id.c_str(), sizeof(result_queue[i].job_id) - 1); - result_queue[i].status = 2; // Cancelled status - result_queue[i].data_size = 0; + result_queue[i].status = 2; // Cancelled status + result_queue[i].data_size = 0; + result_queue[i].shm_data_name[0] = '\0'; strncpy(result_queue[i].error_message, "Job was cancelled", sizeof(result_queue[i].error_message) - 1); @@ -521,16 +537,73 @@ void worker_process(int worker_id) << " (type: " << (is_mip ? "MIP" : "LP") << ")\n"; std::cout.flush(); - // Redirect stdout to per-job log file for client log retrieval + // Redirect stdout AND stderr to per-job log file for client log retrieval + // (Solver may use either stream for output) std::string log_file = get_log_file_path(job_id); int saved_stdout = dup(STDOUT_FILENO); + int saved_stderr = dup(STDERR_FILENO); int log_fd = open(log_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644); if (log_fd >= 0) { + // Flush C++ streams before changing fd + std::cout.flush(); + std::cerr.flush(); + fflush(stdout); + fflush(stderr); + dup2(log_fd, STDOUT_FILENO); + dup2(log_fd, STDERR_FILENO); close(log_fd); + + // Use unbuffered output for real-time log streaming + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + + // Test that redirection works + printf("[Worker %d] Log file initialized: %s\n", worker_id, log_file.c_str()); + fflush(stdout); } - std::vector request_data(job.data, job.data + job.data_size); + // Create RAFT handle AFTER stdout redirect so CUDA sees the new streams + const char* msg0 = "[Worker] Creating raft::handle_t...\n"; + write(STDOUT_FILENO, msg0, 36); + fsync(STDOUT_FILENO); + + raft::handle_t handle; + + const char* msg01 = "[Worker] Handle created, starting solve...\n"; + write(STDOUT_FILENO, msg01, 44); + fsync(STDOUT_FILENO); + + // Read problem data from per-job shared memory + std::vector request_data; + if (!read_job_shm(job.shm_data_name, job.data_size, request_data)) { + std::cerr << "[Worker " << worker_id << "] Failed to read job data from shm\n"; + // Store error result + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (!result_queue[i].ready) { + strncpy(result_queue[i].job_id, job_id.c_str(), sizeof(result_queue[i].job_id) - 1); + result_queue[i].status = 1; // Error status + result_queue[i].data_size = 0; + result_queue[i].shm_data_name[0] = '\0'; + strncpy(result_queue[i].error_message, + "Failed to read job data from shared memory", + sizeof(result_queue[i].error_message) - 1); + result_queue[i].retrieved = false; + result_queue[i].ready = true; + break; + } + } + // Cleanup job shm and slot + cleanup_job_shm(job.shm_data_name); + job.worker_pid = 0; + job.ready = false; + job.claimed = false; + continue; + } + + // Cleanup job input shm now that we've read it + cleanup_job_shm(job.shm_data_name); + std::vector result_data; std::string error_message; bool success = false; @@ -553,7 +626,13 @@ void worker_process(int worker_id) pdlp_solver_settings_t settings; if (serializer->deserialize_lp_request(request_data, mps_data, settings)) { - auto solution = solve_lp(&handle, mps_data, settings); + const char* msg1 = "[Worker] Calling solve_lp via write()...\n"; + write(STDOUT_FILENO, msg1, strlen(msg1)); + fsync(STDOUT_FILENO); + auto solution = solve_lp(&handle, mps_data, settings); + const char* msg2 = "[Worker] solve_lp done via write()\n"; + write(STDOUT_FILENO, msg2, strlen(msg2)); + fsync(STDOUT_FILENO); solution.to_host(handle.get_stream()); result_data = serializer->serialize_lp_solution(solution); success = true; @@ -565,25 +644,42 @@ void worker_process(int worker_id) error_message = std::string("Exception: ") + e.what(); } - // Restore stdout to console + // Restore stdout and stderr to console fflush(stdout); + fflush(stderr); dup2(saved_stdout, STDOUT_FILENO); + dup2(saved_stderr, STDERR_FILENO); close(saved_stdout); + close(saved_stderr); - // Store result in result queue + // Store result in result queue with per-result shared memory for (size_t i = 0; i < MAX_RESULTS; ++i) { if (!result_queue[i].ready) { - bool expected = false; - // Claim this result slot ResultQueueEntry& result = result_queue[i]; strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); result.status = success ? 0 : 1; - if (success) { - result.data_size = std::min(result_data.size(), MAX_RESULT_SIZE); - memcpy(result.data, result_data.data(), result.data_size); - } else { + if (success && !result_data.empty()) { + // Create per-result shared memory + std::string shm_name = write_result_shm(job_id, result_data); + if (shm_name.empty()) { + // Failed to create shm - report error + result.status = 1; + result.data_size = 0; + result.shm_data_name[0] = '\0'; + strncpy(result.error_message, + "Failed to create shared memory for result", + sizeof(result.error_message) - 1); + } else { + result.data_size = result_data.size(); + strncpy(result.shm_data_name, shm_name.c_str(), sizeof(result.shm_data_name) - 1); + } + } else if (!success) { strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); - result.data_size = 0; + result.data_size = 0; + result.shm_data_name[0] = '\0'; + } else { + result.data_size = 0; + result.shm_data_name[0] = '\0'; } result.retrieved = false; result.ready = true; // Mark as ready last @@ -663,12 +759,16 @@ void mark_worker_jobs_failed(pid_t dead_worker_pid) << " died while processing job: " << job_id << "\n"; } + // Cleanup job input shm (worker may not have done it) + cleanup_job_shm(job_queue[i].shm_data_name); + // Store result in result queue (cancelled or failed) for (size_t j = 0; j < MAX_RESULTS; ++j) { if (!result_queue[j].ready) { strncpy(result_queue[j].job_id, job_id.c_str(), sizeof(result_queue[j].job_id) - 1); - result_queue[j].status = was_cancelled ? 2 : 1; // 2=cancelled, 1=error - result_queue[j].data_size = 0; + result_queue[j].status = was_cancelled ? 2 : 1; // 2=cancelled, 1=error + result_queue[j].data_size = 0; + result_queue[j].shm_data_name[0] = '\0'; strncpy(result_queue[j].error_message, was_cancelled ? "Job was cancelled" : "Worker process died unexpectedly", sizeof(result_queue[j].error_message) - 1); @@ -788,10 +888,16 @@ void result_retrieval_thread() std::vector result_data; std::string error_message; - if (success) { - result_data.assign(result_queue[i].data, - result_queue[i].data + result_queue[i].data_size); - } else { + if (success && result_queue[i].data_size > 0) { + // Read result data from per-result shared memory + if (!read_job_shm( + result_queue[i].shm_data_name, result_queue[i].data_size, result_data)) { + error_message = "Failed to read result data from shared memory"; + success = false; + } + // Cleanup result shm after reading + cleanup_job_shm(result_queue[i].shm_data_name); + } else if (!success) { error_message = result_queue[i].error_message; } @@ -848,12 +954,86 @@ void result_retrieval_thread() // Async Request Handlers // ============================================================================ +// Create per-job shared memory segment and copy data into it +// Returns the shm name on success, empty string on failure +static std::string create_job_shm(const std::string& job_id, + const std::vector& data, + const char* prefix) +{ + std::string shm_name = std::string("/cuopt_") + prefix + "_" + job_id; + + int fd = shm_open(shm_name.c_str(), O_CREAT | O_RDWR, 0666); + if (fd < 0) { + std::cerr << "[Server] Failed to create shm " << shm_name << ": " << strerror(errno) << "\n"; + return ""; + } + + if (ftruncate(fd, data.size()) < 0) { + std::cerr << "[Server] Failed to size shm " << shm_name << ": " << strerror(errno) << "\n"; + close(fd); + shm_unlink(shm_name.c_str()); + return ""; + } + + void* ptr = mmap(nullptr, data.size(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + + if (ptr == MAP_FAILED) { + std::cerr << "[Server] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n"; + shm_unlink(shm_name.c_str()); + return ""; + } + + memcpy(ptr, data.data(), data.size()); + munmap(ptr, data.size()); + + return shm_name; +} + +// Read data from per-job shared memory segment +static bool read_job_shm(const char* shm_name, size_t data_size, std::vector& data) +{ + int fd = shm_open(shm_name, O_RDONLY, 0666); + if (fd < 0) { + std::cerr << "[Worker] Failed to open shm " << shm_name << ": " << strerror(errno) << "\n"; + return false; + } + + void* ptr = mmap(nullptr, data_size, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + + if (ptr == MAP_FAILED) { + std::cerr << "[Worker] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n"; + return false; + } + + data.resize(data_size); + memcpy(data.data(), ptr, data_size); + munmap(ptr, data_size); + + return true; +} + +// Write data to per-result shared memory segment +static std::string write_result_shm(const std::string& job_id, const std::vector& data) +{ + return create_job_shm(job_id, data, "result"); +} + +// Cleanup per-job shared memory segment +static void cleanup_job_shm(const char* shm_name) +{ + if (shm_name[0] != '\0') { shm_unlink(shm_name); } +} + // Submit a job asynchronously (returns job_id) std::pair submit_job_async(const std::vector& request_data, bool is_mip) { std::string job_id = generate_job_id(); - if (request_data.size() > MAX_JOB_DATA_SIZE) { return {false, "Request data too large"}; } + // Create per-job shared memory for problem data + std::string shm_name = create_job_shm(job_id, request_data, "job"); + if (shm_name.empty()) { return {false, "Failed to create shared memory for job data"}; } // Find free job slot for (size_t i = 0; i < MAX_JOBS; ++i) { @@ -861,7 +1041,7 @@ std::pair submit_job_async(const std::vector& reques strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1); job_queue[i].problem_type = is_mip ? 1 : 0; job_queue[i].data_size = request_data.size(); - memcpy(job_queue[i].data, request_data.data(), request_data.size()); + strncpy(job_queue[i].shm_data_name, shm_name.c_str(), sizeof(job_queue[i].shm_data_name) - 1); job_queue[i].worker_pid = 0; job_queue[i].claimed = false; job_queue[i].cancelled = false; @@ -885,6 +1065,8 @@ std::pair submit_job_async(const std::vector& reques } } + // No free slot - cleanup the shm we created + shm_unlink(shm_name.c_str()); return {false, "Job queue full"}; } @@ -1299,7 +1481,7 @@ void handle_client(int client_fd, bool stream_logs) if (!submit_ok) { // Submission failed auto response = serializer->serialize_submit_response(false, job_id_or_error); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); } else if (blocking) { @@ -1318,12 +1500,15 @@ void handle_client(int client_fd, bool stream_logs) // Block on condition variable until job completes bool success = wait_for_result(job_id, result_data, error_message); - // Auto-delete job after returning result (client doesn't need to call DELETE) - delete_job(job_id); + // NOTE: We do NOT auto-delete here. The client should call DELETE_RESULT + // after consuming all logs. This allows the pattern: + // 1. Submit job (blocking=true or async + WAIT_FOR_RESULT) + // 2. Retrieve logs (GET_LOGS) - can be done in parallel thread + // 3. Delete job (DELETE_RESULT) when done with logs // Return result response (same format as GET_RESULT) auto response = serializer->serialize_result_response(success, result_data, error_message); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); @@ -1334,7 +1519,7 @@ void handle_client(int client_fd, bool stream_logs) } else { // ASYNC MODE: Return job_id immediately auto response = serializer->serialize_submit_response(true, job_id_or_error); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); } @@ -1355,7 +1540,7 @@ void handle_client(int client_fd, bool stream_logs) auto response = serializer->serialize_status_response(status_code, message); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); } else if (request_type == 2) { // GET_RESULT @@ -1366,7 +1551,7 @@ void handle_client(int client_fd, bool stream_logs) bool success = get_job_result(job_id, result_data, error_message); auto response = serializer->serialize_result_response(success, result_data, error_message); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); } else if (request_type == 3) { // DELETE_RESULT @@ -1375,7 +1560,7 @@ void handle_client(int client_fd, bool stream_logs) auto response = serializer->serialize_delete_response(success); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); } else if (request_type == 4) { // GET_LOGS @@ -1412,7 +1597,7 @@ void handle_client(int client_fd, bool stream_logs) auto response = serializer->serialize_logs_response(job_id, log_lines, nbytes, job_exists); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); @@ -1441,7 +1626,7 @@ void handle_client(int client_fd, bool stream_logs) bool success = (result == 0); auto response = serializer->serialize_cancel_response(success, message, status_code); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); @@ -1465,7 +1650,7 @@ void handle_client(int client_fd, bool stream_logs) // Send result response (same format as GET_RESULT) auto response = serializer->serialize_result_response(success, result_data, error_message); - uint32_t size = response.size(); + uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu index 42cc0be61..49c8e600f 100644 --- a/cpp/src/linear_programming/utilities/remote_solve.cu +++ b/cpp/src/linear_programming/utilities/remote_solve.cu @@ -91,8 +91,8 @@ class remote_client_t { { if (sockfd_ < 0) return false; - // Send size first (4 bytes, network byte order) - uint32_t size = static_cast(data.size()); + // Send size first (8 bytes for large problem support) + uint64_t size = static_cast(data.size()); if (!write_all(&size, sizeof(size))) return false; if (!write_all(data.data(), data.size())) return false; return true; @@ -122,12 +122,12 @@ class remote_client_t { return receive_response_legacy(data); } - // Read payload size (4 bytes) - uint32_t payload_size; + // Read payload size (8 bytes for large problem support) + uint64_t payload_size; if (!read_all(&payload_size, sizeof(payload_size))) return false; - // Sanity check - if (payload_size > 100 * 1024 * 1024) { + // Sanity check - reject messages larger than 16GB + if (payload_size > 16ULL * 1024 * 1024 * 1024) { CUOPT_LOG_ERROR("[remote_solve] Message too large: {} bytes", payload_size); return false; } @@ -170,12 +170,12 @@ class remote_client_t { { if (sockfd_ < 0) return false; - // Read size first - uint32_t size; + // Read size first (8 bytes for large problem support) + uint64_t size; if (!read_all(&size, sizeof(size))) return false; - // Sanity check - if (size > 100 * 1024 * 1024) { + // Sanity check - reject responses larger than 16GB + if (size > 16ULL * 1024 * 1024 * 1024) { CUOPT_LOG_ERROR("[remote_solve] Response too large: {} bytes", size); return false; } From bcf03cb0864d4c601e63c97d82b744747a56759e Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 12:30:24 -0500 Subject: [PATCH 23/37] Fix MIP remote solve result deserialization and UTF-8 warnings - Add is_mip parameter to serialize_result_response to properly distinguish LP vs MIP solutions (protobuf parsing is lenient and was misinterpreting MIP results as LP) - Add get_job_is_mip helper function to retrieve job type from tracker - Change job_id fields from string to bytes in protobuf schema to avoid spurious UTF-8 validation warnings when parsing wrong message types - Update all serialize_result_response call sites to pass is_mip flag Both LP and MIP remote solve now work correctly with Python Problem class. --- cpp/cuopt_remote_server.cpp | 25 ++++++++++++++++--- .../utilities/remote_serialization.hpp | 4 ++- .../utilities/cuopt_remote.proto | 6 ++--- .../utilities/protobuf_serializer.cu | 21 +++++++--------- .../serializers/msgpack_serializer.cpp | 5 +++- 5 files changed, 40 insertions(+), 21 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index ac2af7cf4..a004b4f62 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -1105,6 +1105,17 @@ JobStatus check_job_status(const std::string& job_id, std::string& message) return it->second.status; } +// Check if a job is MIP (vs LP) +bool get_job_is_mip(const std::string& job_id) +{ + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + if (it == job_tracker.end()) { + return false; // Default to LP if not found + } + return it->second.is_mip; +} + // Get job result bool get_job_result(const std::string& job_id, std::vector& result_data, @@ -1507,7 +1518,9 @@ void handle_client(int client_fd, bool stream_logs) // 3. Delete job (DELETE_RESULT) when done with logs // Return result response (same format as GET_RESULT) - auto response = serializer->serialize_result_response(success, result_data, error_message); + bool job_is_mip = is_mip; // Use the is_mip from the submit request + auto response = + serializer->serialize_result_response(success, result_data, error_message, job_is_mip); uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); write_all(client_fd, response.data(), response.size()); @@ -1548,8 +1561,10 @@ void handle_client(int client_fd, bool stream_logs) std::vector result_data; std::string error_message; - bool success = get_job_result(job_id, result_data, error_message); - auto response = serializer->serialize_result_response(success, result_data, error_message); + bool success = get_job_result(job_id, result_data, error_message); + bool job_is_mip = get_job_is_mip(job_id); + auto response = + serializer->serialize_result_response(success, result_data, error_message, job_is_mip); uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); @@ -1648,7 +1663,9 @@ void handle_client(int client_fd, bool stream_logs) bool success = wait_for_result(job_id, result_data, error_message); // Send result response (same format as GET_RESULT) - auto response = serializer->serialize_result_response(success, result_data, error_message); + bool job_is_mip = get_job_is_mip(job_id); + auto response = + serializer->serialize_result_response(success, result_data, error_message, job_is_mip); uint64_t size = response.size(); write_all(client_fd, &size, sizeof(size)); diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp index c7bf61da0..9e0b53cd8 100644 --- a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp @@ -371,11 +371,13 @@ class remote_serializer_t { * @param success Whether result retrieval succeeded * @param result_data The solution data (if success) * @param error_message Error message (if failure) + * @param is_mip Whether this is a MIP solution (vs LP) * @return Serialized response bytes */ virtual std::vector serialize_result_response(bool success, const std::vector& result_data, - const std::string& error_message) = 0; + const std::string& error_message, + bool is_mip = false) = 0; /** * @brief Serialize a delete response. diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto index cd48f9587..354d9d1b5 100644 --- a/cpp/src/linear_programming/utilities/cuopt_remote.proto +++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto @@ -278,7 +278,7 @@ enum JobStatus { // Generic request wrapper for async operations message AsyncRequest { RequestType request_type = 1; - string job_id = 2; // For status/get/delete/get_logs operations + bytes job_id = 2; // For status/get/delete/get_logs operations (bytes to avoid UTF-8 validation warnings) bool blocking = 3; // If true, server waits and returns solution (sync mode) int64 frombyte = 4; // For GET_LOGS: byte offset to start reading from @@ -292,7 +292,7 @@ message AsyncRequest { // Response for job submission message SubmitResponse { ResponseStatus status = 1; - string job_id = 2; // Unique job identifier + bytes job_id = 2; // Unique job identifier (bytes to avoid UTF-8 validation warnings) string message = 3; // Success/error message } @@ -323,7 +323,7 @@ message DeleteResponse { // Response for log retrieval (file-based, like Python server) message LogsResponse { ResponseStatus status = 1; - string job_id = 2; + bytes job_id = 2; // (bytes to avoid UTF-8 validation warnings) repeated string log_lines = 3; // Log lines read from file int64 nbytes = 4; // Ending byte position (use as frombyte in next request) bool job_exists = 5; // False if job_id not found diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu index 28c60664d..ad3c2704c 100644 --- a/cpp/src/linear_programming/utilities/protobuf_serializer.cu +++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu @@ -689,7 +689,8 @@ class protobuf_serializer_t : public remote_serializer_t { std::vector serialize_result_response(bool success, const std::vector& result_data, - const std::string& error_message) override + const std::string& error_message, + bool is_mip = false) override { cuopt::remote::AsyncResponse response; response.set_request_type(cuopt::remote::GET_RESULT); @@ -698,21 +699,17 @@ class protobuf_serializer_t : public remote_serializer_t { if (success) { result->set_status(cuopt::remote::SUCCESS); - // The result_data is the raw serialized solution (LP or MIP) - // We need to wrap it properly. For simplicity, we'll embed it as raw bytes - // and the client will know to parse it based on the original request type. - // Actually, we should try to parse it and embed properly. - - // Try to parse as LP solution first - cuopt::remote::LPSolution lp_sol; - if (lp_sol.ParseFromArray(result_data.data(), result_data.size())) { - result->mutable_lp_solution()->CopyFrom(lp_sol); - } else { - // Try MIP solution + // Parse and embed the solution based on problem type + if (is_mip) { cuopt::remote::MIPSolution mip_sol; if (mip_sol.ParseFromArray(result_data.data(), result_data.size())) { result->mutable_mip_solution()->CopyFrom(mip_sol); } + } else { + cuopt::remote::LPSolution lp_sol; + if (lp_sol.ParseFromArray(result_data.data(), result_data.size())) { + result->mutable_lp_solution()->CopyFrom(lp_sol); + } } } else { result->set_status(cuopt::remote::ERROR_INTERNAL); diff --git a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp index f01b78547..018382df5 100644 --- a/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp +++ b/cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp @@ -1072,9 +1072,12 @@ class msgpack_serializer_t : public remote_serializer_t { std::vector serialize_result_response(bool success, const std::vector& result_data, - const std::string& error_message) override + const std::string& error_message, + bool is_mip = false) override { // For result response, we prepend success flag then the actual solution data + // Note: is_mip flag is not needed here as msgpack just passes through raw data + (void)is_mip; // Unused but required for interface compatibility msgpack::sbuffer buffer; msgpack::packer pk(&buffer); pk.pack(success); From 45815ca518e9619ec736c0c9abaa64fdaffb1066 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 13:10:32 -0500 Subject: [PATCH 24/37] Add pipe-based IPC as default, shm as option (--use-shm) Pipes are container-friendly (no /dev/shm size limits or volume mounts needed). Shared memory may be slightly faster for very large problems. Changes: - Add WorkerPipes struct with pipe file descriptors - Add worker_index, data_sent fields to JobQueueEntry for pipe sync - Add worker_index field to ResultQueueEntry for pipe mode - Add pending_job_data map for storing data before worker claims job - Add pipe I/O helpers: write_to_pipe, read_from_pipe, send_job_data_pipe, recv_job_data_pipe, send_result_pipe, recv_result_pipe - Modify spawn_workers to create pipes before forking - Modify spawn_single_worker to recreate pipes when restarting workers - Modify worker_process to read job data from pipe or shm based on mode - Modify result_retrieval_thread to send job data to workers (pipe mode) and read results from pipes - Add --use-shm command line option - Display IPC mode in server startup message Tested: Both pipe mode (default) and shm mode work correctly with LP and MIP remote solve via Python Problem class. --- cpp/cuopt_remote_server.cpp | 592 ++++++++++++++++++++++++++++++------ 1 file changed, 502 insertions(+), 90 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index a004b4f62..4f666f530 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -61,27 +61,32 @@ using namespace cuopt::linear_programming; constexpr size_t MAX_JOBS = 100; constexpr size_t MAX_RESULTS = 100; -// Job queue entry - small fixed size, data stored in separate per-job shared memory +// Job queue entry - small fixed size, data stored in separate per-job shared memory or sent via +// pipe struct JobQueueEntry { char job_id[64]; uint32_t problem_type; // 0 = LP, 1 = MIP uint64_t data_size; // Size of problem data (uint64 for large problems) - char shm_data_name[128]; // Name of per-job shared memory segment + char shm_data_name[128]; // Name of per-job shared memory segment (shm mode only) std::atomic ready; // Job is ready to be processed std::atomic claimed; // Worker has claimed this job std::atomic worker_pid; // PID of worker that claimed this job (0 if none) std::atomic cancelled; // Job has been cancelled (worker should skip) + // Pipe mode fields + std::atomic worker_index; // Index of worker that claimed this job (-1 if none) + std::atomic data_sent; // Server has sent data to worker's pipe (pipe mode) }; -// Result queue entry - small fixed size, data stored in separate per-result shared memory +// Result queue entry - small fixed size, data stored in separate per-result shared memory or pipe struct ResultQueueEntry { char job_id[64]; - uint32_t status; // 0 = success, 1 = error + uint32_t status; // 0 = success, 1 = error, 2 = cancelled uint64_t data_size; // Size of result data (uint64 for large results) - char shm_data_name[128]; // Name of per-result shared memory segment + char shm_data_name[128]; // Name of per-result shared memory segment (shm mode only) char error_message[1024]; - std::atomic ready; // Result is ready - std::atomic retrieved; // Result has been retrieved + std::atomic ready; // Result is ready + std::atomic retrieved; // Result has been retrieved + std::atomic worker_index; // Index of worker that produced this result (pipe mode) }; // Shared memory control block @@ -252,10 +257,25 @@ struct ServerConfig { int num_workers = 1; bool verbose = true; bool stream_logs = true; + bool use_pipes = true; // Default to pipes (container-friendly), --use-shm to disable }; ServerConfig config; +// Worker state for pipe-based IPC +struct WorkerPipes { + int to_worker_fd; // Server writes job data to this (pipe write end) + int from_worker_fd; // Server reads results from this (pipe read end) + int worker_read_fd; // Worker reads job data from this (inherited, closed in parent) + int worker_write_fd; // Worker writes results to this (inherited, closed in parent) +}; + +std::vector worker_pipes; + +// Pending job data for pipe mode (job_id -> serialized data) +std::mutex pending_data_mutex; +std::map> pending_job_data; + // Shared memory names const char* SHM_JOB_QUEUE = "/cuopt_job_queue"; const char* SHM_RESULT_QUEUE = "/cuopt_result_queue"; @@ -377,10 +397,12 @@ bool init_shared_memory() // Initialize job queue entries for (size_t i = 0; i < MAX_JOBS; ++i) { - job_queue[i].ready = false; - job_queue[i].claimed = false; - job_queue[i].worker_pid = 0; - job_queue[i].cancelled = false; + job_queue[i].ready = false; + job_queue[i].claimed = false; + job_queue[i].worker_pid = 0; + job_queue[i].cancelled = false; + job_queue[i].worker_index = -1; + job_queue[i].data_sent = false; } // Create result queue shared memory @@ -405,8 +427,9 @@ bool init_shared_memory() // Initialize result queue entries for (size_t i = 0; i < MAX_RESULTS; ++i) { - result_queue[i].ready = false; - result_queue[i].retrieved = false; + result_queue[i].ready = false; + result_queue[i].retrieved = false; + result_queue[i].worker_index = -1; } // Create control shared memory @@ -457,6 +480,16 @@ std::string get_log_file_path(const std::string& job_id); void ensure_log_dir_exists(); void delete_log_file(const std::string& job_id); +// ============================================================================ +// Forward declarations for pipe I/O helpers +// ============================================================================ +static bool write_to_pipe(int fd, const void* data, size_t size); +static bool read_from_pipe(int fd, void* data, size_t size); +static bool send_job_data_pipe(int worker_idx, const std::vector& data); +static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector& data); +static bool send_result_pipe(int fd, const std::vector& data); +static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector& data); + // ============================================================================ // Worker Process // ============================================================================ @@ -482,8 +515,9 @@ void worker_process(int worker_id) // Try to claim this job atomically bool expected = false; if (job_queue[i].claimed.compare_exchange_strong(expected, true)) { - job_queue[i].worker_pid = getpid(); // Record our PID - job_slot = i; + job_queue[i].worker_pid = getpid(); // Record our PID + job_queue[i].worker_index = worker_id; // Record worker index for pipe mode + job_slot = i; break; } } @@ -506,8 +540,8 @@ void worker_process(int worker_id) << "\n"; std::cout.flush(); - // Cleanup job input shm - cleanup_job_shm(job.shm_data_name); + // Cleanup job input shm (shm mode only) + if (!config.use_pipes) { cleanup_job_shm(job.shm_data_name); } // Store cancelled result in result queue for (size_t i = 0; i < MAX_RESULTS; ++i) { @@ -516,6 +550,7 @@ void worker_process(int worker_id) result_queue[i].status = 2; // Cancelled status result_queue[i].data_size = 0; result_queue[i].shm_data_name[0] = '\0'; + result_queue[i].worker_index = worker_id; // For pipe mode strncpy(result_queue[i].error_message, "Job was cancelled", sizeof(result_queue[i].error_message) - 1); @@ -526,10 +561,12 @@ void worker_process(int worker_id) } // Clear job slot (don't exit/restart worker) - job.worker_pid = 0; - job.ready = false; - job.claimed = false; - job.cancelled = false; + job.worker_pid = 0; + job.worker_index = -1; + job.data_sent = false; + job.ready = false; + job.claimed = false; + job.cancelled = false; continue; // Go back to waiting for next job } @@ -574,10 +611,41 @@ void worker_process(int worker_id) write(STDOUT_FILENO, msg01, 44); fsync(STDOUT_FILENO); - // Read problem data from per-job shared memory + // Read problem data (pipe mode or shm mode) std::vector request_data; - if (!read_job_shm(job.shm_data_name, job.data_size, request_data)) { - std::cerr << "[Worker " << worker_id << "] Failed to read job data from shm\n"; + bool read_success = false; + + if (config.use_pipes) { + // Pipe mode: wait for server to send data, then read from pipe + // Wait for data_sent flag with timeout + int wait_count = 0; + while (!job.data_sent && !job.cancelled && !shm_ctrl->shutdown_requested) { + usleep(1000); // 1ms + if (++wait_count > 30000) { // 30 second timeout + std::cerr << "[Worker " << worker_id << "] Timeout waiting for job data\n"; + break; + } + } + + if (job.data_sent && !job.cancelled) { + // Read from our input pipe + int read_fd = worker_pipes[worker_id].worker_read_fd; + read_success = recv_job_data_pipe(read_fd, job.data_size, request_data); + if (!read_success) { + std::cerr << "[Worker " << worker_id << "] Failed to read job data from pipe\n"; + } + } + } else { + // SHM mode: read from shared memory + read_success = read_job_shm(job.shm_data_name, job.data_size, request_data); + if (!read_success) { + std::cerr << "[Worker " << worker_id << "] Failed to read job data from shm\n"; + } + // Cleanup job input shm now that we've read it + cleanup_job_shm(job.shm_data_name); + } + + if (!read_success) { // Store error result for (size_t i = 0; i < MAX_RESULTS; ++i) { if (!result_queue[i].ready) { @@ -585,25 +653,24 @@ void worker_process(int worker_id) result_queue[i].status = 1; // Error status result_queue[i].data_size = 0; result_queue[i].shm_data_name[0] = '\0'; + result_queue[i].worker_index = worker_id; strncpy(result_queue[i].error_message, - "Failed to read job data from shared memory", + "Failed to read job data", sizeof(result_queue[i].error_message) - 1); result_queue[i].retrieved = false; result_queue[i].ready = true; break; } } - // Cleanup job shm and slot - cleanup_job_shm(job.shm_data_name); - job.worker_pid = 0; - job.ready = false; - job.claimed = false; + // Clear job slot + job.worker_pid = 0; + job.worker_index = -1; + job.data_sent = false; + job.ready = false; + job.claimed = false; continue; } - // Cleanup job input shm now that we've read it - cleanup_job_shm(job.shm_data_name); - std::vector result_data; std::string error_message; bool success = false; @@ -652,46 +719,81 @@ void worker_process(int worker_id) close(saved_stdout); close(saved_stderr); - // Store result in result queue with per-result shared memory - for (size_t i = 0; i < MAX_RESULTS; ++i) { - if (!result_queue[i].ready) { - ResultQueueEntry& result = result_queue[i]; - strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); - result.status = success ? 0 : 1; - if (success && !result_data.empty()) { - // Create per-result shared memory - std::string shm_name = write_result_shm(job_id, result_data); - if (shm_name.empty()) { - // Failed to create shm - report error - result.status = 1; + // Store result (pipe mode: write to pipe, shm mode: write to shared memory) + if (config.use_pipes) { + // Pipe mode: write result to output pipe + if (success && !result_data.empty()) { + int write_fd = worker_pipes[worker_id].worker_write_fd; + bool write_success = send_result_pipe(write_fd, result_data); + if (!write_success) { + std::cerr << "[Worker " << worker_id << "] Failed to write result to pipe\n"; + success = false; + error_message = "Failed to write result to pipe"; + } + } + + // Store result metadata in result queue + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (!result_queue[i].ready) { + ResultQueueEntry& result = result_queue[i]; + strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); + result.status = success ? 0 : 1; + result.data_size = success ? result_data.size() : 0; + result.shm_data_name[0] = '\0'; // Not used in pipe mode + result.worker_index = worker_id; + if (!success) { + strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); + } + result.retrieved = false; + result.ready = true; // Mark as ready last + break; + } + } + } else { + // SHM mode: store result in shared memory + for (size_t i = 0; i < MAX_RESULTS; ++i) { + if (!result_queue[i].ready) { + ResultQueueEntry& result = result_queue[i]; + strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); + result.status = success ? 0 : 1; + result.worker_index = worker_id; + if (success && !result_data.empty()) { + // Create per-result shared memory + std::string shm_name = write_result_shm(job_id, result_data); + if (shm_name.empty()) { + // Failed to create shm - report error + result.status = 1; + result.data_size = 0; + result.shm_data_name[0] = '\0'; + strncpy(result.error_message, + "Failed to create shared memory for result", + sizeof(result.error_message) - 1); + } else { + result.data_size = result_data.size(); + strncpy(result.shm_data_name, shm_name.c_str(), sizeof(result.shm_data_name) - 1); + } + } else if (!success) { + strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); result.data_size = 0; result.shm_data_name[0] = '\0'; - strncpy(result.error_message, - "Failed to create shared memory for result", - sizeof(result.error_message) - 1); } else { - result.data_size = result_data.size(); - strncpy(result.shm_data_name, shm_name.c_str(), sizeof(result.shm_data_name) - 1); + result.data_size = 0; + result.shm_data_name[0] = '\0'; } - } else if (!success) { - strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); - result.data_size = 0; - result.shm_data_name[0] = '\0'; - } else { - result.data_size = 0; - result.shm_data_name[0] = '\0'; + result.retrieved = false; + result.ready = true; // Mark as ready last + break; } - result.retrieved = false; - result.ready = true; // Mark as ready last - break; } } // Clear job slot - job.worker_pid = 0; - job.ready = false; - job.claimed = false; - job.cancelled = false; + job.worker_pid = 0; + job.worker_index = -1; + job.data_sent = false; + job.ready = false; + job.claimed = false; + job.cancelled = false; std::cout << "[Worker " << worker_id << "] Completed job: " << job_id << " (success: " << success << ")\n"; @@ -702,19 +804,111 @@ void worker_process(int worker_id) _exit(0); } +// Create pipes for a worker (pipe mode only) +bool create_worker_pipes(int worker_id) +{ + if (!config.use_pipes) return true; + + // Ensure worker_pipes has enough slots + while (static_cast(worker_pipes.size()) <= worker_id) { + worker_pipes.push_back({-1, -1, -1, -1}); + } + + WorkerPipes& wp = worker_pipes[worker_id]; + + // Create pipe for server -> worker data + int input_pipe[2]; + if (pipe(input_pipe) < 0) { + std::cerr << "[Server] Failed to create input pipe for worker " << worker_id << "\n"; + return false; + } + wp.worker_read_fd = input_pipe[0]; // Worker reads from this + wp.to_worker_fd = input_pipe[1]; // Server writes to this + + // Create pipe for worker -> server results + int output_pipe[2]; + if (pipe(output_pipe) < 0) { + std::cerr << "[Server] Failed to create output pipe for worker " << worker_id << "\n"; + close(input_pipe[0]); + close(input_pipe[1]); + return false; + } + wp.from_worker_fd = output_pipe[0]; // Server reads from this + wp.worker_write_fd = output_pipe[1]; // Worker writes to this + + return true; +} + +// Close server-side pipe ends for a worker (called when restarting) +void close_worker_pipes_server(int worker_id) +{ + if (!config.use_pipes) return; + if (worker_id < 0 || worker_id >= static_cast(worker_pipes.size())) return; + + WorkerPipes& wp = worker_pipes[worker_id]; + if (wp.to_worker_fd >= 0) { + close(wp.to_worker_fd); + wp.to_worker_fd = -1; + } + if (wp.from_worker_fd >= 0) { + close(wp.from_worker_fd); + wp.from_worker_fd = -1; + } +} + +// Close worker-side pipe ends in parent after fork +void close_worker_pipes_child_ends(int worker_id) +{ + if (!config.use_pipes) return; + if (worker_id < 0 || worker_id >= static_cast(worker_pipes.size())) return; + + WorkerPipes& wp = worker_pipes[worker_id]; + if (wp.worker_read_fd >= 0) { + close(wp.worker_read_fd); + wp.worker_read_fd = -1; + } + if (wp.worker_write_fd >= 0) { + close(wp.worker_write_fd); + wp.worker_write_fd = -1; + } +} + void spawn_workers() { for (int i = 0; i < config.num_workers; ++i) { + // Create pipes before forking (pipe mode) + if (config.use_pipes && !create_worker_pipes(i)) { + std::cerr << "[Server] Failed to create pipes for worker " << i << "\n"; + continue; + } + pid_t pid = fork(); if (pid < 0) { std::cerr << "[Server] Failed to fork worker " << i << "\n"; + close_worker_pipes_server(i); } else if (pid == 0) { // Child process + if (config.use_pipes) { + // Close all other workers' pipe fds + for (int j = 0; j < static_cast(worker_pipes.size()); ++j) { + if (j != i) { + if (worker_pipes[j].worker_read_fd >= 0) close(worker_pipes[j].worker_read_fd); + if (worker_pipes[j].worker_write_fd >= 0) close(worker_pipes[j].worker_write_fd); + if (worker_pipes[j].to_worker_fd >= 0) close(worker_pipes[j].to_worker_fd); + if (worker_pipes[j].from_worker_fd >= 0) close(worker_pipes[j].from_worker_fd); + } + } + // Close server ends of our pipes + close(worker_pipes[i].to_worker_fd); + close(worker_pipes[i].from_worker_fd); + } worker_process(i); _exit(0); // Should not reach here } else { // Parent process worker_pids.push_back(pid); + // Close worker ends of pipes (parent doesn't need them) + close_worker_pipes_child_ends(i); } } } @@ -731,15 +925,43 @@ void wait_for_workers() // Spawn a single replacement worker and return its PID pid_t spawn_single_worker(int worker_id) { + // Create new pipes for the replacement worker (pipe mode) + if (config.use_pipes) { + // Close old pipes first + close_worker_pipes_server(worker_id); + if (!create_worker_pipes(worker_id)) { + std::cerr << "[Server] Failed to create pipes for replacement worker " << worker_id << "\n"; + return -1; + } + } + pid_t pid = fork(); if (pid < 0) { std::cerr << "[Server] Failed to fork replacement worker " << worker_id << "\n"; + close_worker_pipes_server(worker_id); return -1; } else if (pid == 0) { // Child process + if (config.use_pipes) { + // Close all other workers' pipe fds + for (int j = 0; j < static_cast(worker_pipes.size()); ++j) { + if (j != worker_id) { + if (worker_pipes[j].worker_read_fd >= 0) close(worker_pipes[j].worker_read_fd); + if (worker_pipes[j].worker_write_fd >= 0) close(worker_pipes[j].worker_write_fd); + if (worker_pipes[j].to_worker_fd >= 0) close(worker_pipes[j].to_worker_fd); + if (worker_pipes[j].from_worker_fd >= 0) close(worker_pipes[j].from_worker_fd); + } + } + // Close server ends of our pipes + close(worker_pipes[worker_id].to_worker_fd); + close(worker_pipes[worker_id].from_worker_fd); + } worker_process(worker_id); _exit(0); // Should not reach here } + + // Parent: close worker ends of new pipes + close_worker_pipes_child_ends(worker_id); return pid; } @@ -759,8 +981,15 @@ void mark_worker_jobs_failed(pid_t dead_worker_pid) << " died while processing job: " << job_id << "\n"; } - // Cleanup job input shm (worker may not have done it) - cleanup_job_shm(job_queue[i].shm_data_name); + // Cleanup job data + if (config.use_pipes) { + // Pipe mode: remove from pending data if not yet sent + std::lock_guard lock(pending_data_mutex); + pending_job_data.erase(job_id); + } else { + // SHM mode: cleanup job input shm (worker may not have done it) + cleanup_job_shm(job_queue[i].shm_data_name); + } // Store result in result queue (cancelled or failed) for (size_t j = 0; j < MAX_RESULTS; ++j) { @@ -769,6 +998,7 @@ void mark_worker_jobs_failed(pid_t dead_worker_pid) result_queue[j].status = was_cancelled ? 2 : 1; // 2=cancelled, 1=error result_queue[j].data_size = 0; result_queue[j].shm_data_name[0] = '\0'; + result_queue[j].worker_index = -1; strncpy(result_queue[j].error_message, was_cancelled ? "Job was cancelled" : "Worker process died unexpectedly", sizeof(result_queue[j].error_message) - 1); @@ -779,10 +1009,12 @@ void mark_worker_jobs_failed(pid_t dead_worker_pid) } // Clear the job slot - job_queue[i].worker_pid = 0; - job_queue[i].ready = false; - job_queue[i].claimed = false; - job_queue[i].cancelled = false; + job_queue[i].worker_pid = 0; + job_queue[i].worker_index = -1; + job_queue[i].data_sent = false; + job_queue[i].ready = false; + job_queue[i].claimed = false; + job_queue[i].cancelled = false; // Update job tracker { @@ -868,6 +1100,7 @@ void worker_monitor_thread() // ============================================================================ // Result Retrieval Thread (main process) +// Also handles sending job data to workers in pipe mode // ============================================================================ void result_retrieval_thread() @@ -877,6 +1110,46 @@ void result_retrieval_thread() while (keep_running) { bool found = false; + // PIPE MODE: Check for jobs that need data sent to workers + if (config.use_pipes) { + for (size_t i = 0; i < MAX_JOBS; ++i) { + if (job_queue[i].ready && job_queue[i].claimed && !job_queue[i].data_sent && + !job_queue[i].cancelled) { + std::string job_id(job_queue[i].job_id); + int worker_idx = job_queue[i].worker_index; + + if (worker_idx >= 0) { + // Get pending job data + std::vector job_data; + { + std::lock_guard lock(pending_data_mutex); + auto it = pending_job_data.find(job_id); + if (it != pending_job_data.end()) { + job_data = std::move(it->second); + pending_job_data.erase(it); + } + } + + if (!job_data.empty()) { + // Send data to worker's pipe + if (send_job_data_pipe(worker_idx, job_data)) { + job_queue[i].data_sent = true; + if (config.verbose) { + std::cout << "[Server] Sent " << job_data.size() << " bytes to worker " + << worker_idx << " for job " << job_id << "\n"; + } + } else { + std::cerr << "[Server] Failed to send job data to worker " << worker_idx << "\n"; + // Mark job as failed + job_queue[i].cancelled = true; + } + found = true; + } + } + } + } + } + // Check for completed results for (size_t i = 0; i < MAX_RESULTS; ++i) { if (result_queue[i].ready && !result_queue[i].retrieved) { @@ -884,19 +1157,28 @@ void result_retrieval_thread() uint32_t result_status = result_queue[i].status; bool success = (result_status == 0); bool cancelled = (result_status == 2); + int worker_idx = result_queue[i].worker_index; std::vector result_data; std::string error_message; if (success && result_queue[i].data_size > 0) { - // Read result data from per-result shared memory - if (!read_job_shm( - result_queue[i].shm_data_name, result_queue[i].data_size, result_data)) { - error_message = "Failed to read result data from shared memory"; - success = false; + if (config.use_pipes) { + // Pipe mode: read result from worker's output pipe + if (!recv_result_pipe(worker_idx, result_queue[i].data_size, result_data)) { + error_message = "Failed to read result data from pipe"; + success = false; + } + } else { + // SHM mode: read from shared memory + if (!read_job_shm( + result_queue[i].shm_data_name, result_queue[i].data_size, result_data)) { + error_message = "Failed to read result data from shared memory"; + success = false; + } + // Cleanup result shm after reading + cleanup_job_shm(result_queue[i].shm_data_name); } - // Cleanup result shm after reading - cleanup_job_shm(result_queue[i].shm_data_name); } else if (!success) { error_message = result_queue[i].error_message; } @@ -934,9 +1216,10 @@ void result_retrieval_thread() } } - result_queue[i].retrieved = true; - result_queue[i].ready = false; // Free slot - found = true; + result_queue[i].retrieved = true; + result_queue[i].worker_index = -1; + result_queue[i].ready = false; // Free slot + found = true; } } @@ -1026,14 +1309,124 @@ static void cleanup_job_shm(const char* shm_name) if (shm_name[0] != '\0') { shm_unlink(shm_name); } } +// ============================================================================ +// Pipe I/O Helpers +// ============================================================================ + +// Write all data to a pipe (handles partial writes) +static bool write_to_pipe(int fd, const void* data, size_t size) +{ + const uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t written = ::write(fd, ptr, remaining); + if (written <= 0) { + if (errno == EINTR) continue; + return false; + } + ptr += written; + remaining -= written; + } + return true; +} + +// Read all data from a pipe (handles partial reads) +static bool read_from_pipe(int fd, void* data, size_t size) +{ + uint8_t* ptr = static_cast(data); + size_t remaining = size; + while (remaining > 0) { + ssize_t nread = ::read(fd, ptr, remaining); + if (nread <= 0) { + if (errno == EINTR) continue; + return false; + } + ptr += nread; + remaining -= nread; + } + return true; +} + +// Send job data to worker via pipe (length-prefixed) +static bool send_job_data_pipe(int worker_idx, const std::vector& data) +{ + if (worker_idx < 0 || worker_idx >= static_cast(worker_pipes.size())) { return false; } + int fd = worker_pipes[worker_idx].to_worker_fd; + if (fd < 0) return false; + + // Send size first + uint64_t size = data.size(); + if (!write_to_pipe(fd, &size, sizeof(size))) return false; + // Send data + if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false; + return true; +} + +// Receive job data from pipe (length-prefixed) - called by worker +static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector& data) +{ + // Read size + uint64_t size; + if (!read_from_pipe(fd, &size, sizeof(size))) return false; + if (size != expected_size) { + std::cerr << "[Worker] Size mismatch: expected " << expected_size << ", got " << size << "\n"; + return false; + } + // Read data + data.resize(size); + if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false; + return true; +} + +// Send result data to server via pipe (length-prefixed) - called by worker +static bool send_result_pipe(int fd, const std::vector& data) +{ + // Send size first + uint64_t size = data.size(); + if (!write_to_pipe(fd, &size, sizeof(size))) return false; + // Send data + if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false; + return true; +} + +// Receive result data from worker via pipe (length-prefixed) +static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector& data) +{ + if (worker_idx < 0 || worker_idx >= static_cast(worker_pipes.size())) { return false; } + int fd = worker_pipes[worker_idx].from_worker_fd; + if (fd < 0) return false; + + // Read size + uint64_t size; + if (!read_from_pipe(fd, &size, sizeof(size))) return false; + if (size != expected_size) { + std::cerr << "[Server] Result size mismatch: expected " << expected_size << ", got " << size + << "\n"; + return false; + } + // Read data + data.resize(size); + if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false; + return true; +} + // Submit a job asynchronously (returns job_id) std::pair submit_job_async(const std::vector& request_data, bool is_mip) { std::string job_id = generate_job_id(); - // Create per-job shared memory for problem data - std::string shm_name = create_job_shm(job_id, request_data, "job"); - if (shm_name.empty()) { return {false, "Failed to create shared memory for job data"}; } + std::string shm_name; + if (config.use_pipes) { + // Pipe mode: store data in pending map (will be sent when worker claims job) + { + std::lock_guard lock(pending_data_mutex); + pending_job_data[job_id] = request_data; + } + } else { + // SHM mode: create per-job shared memory for problem data + shm_name = create_job_shm(job_id, request_data, "job"); + if (shm_name.empty()) { return {false, "Failed to create shared memory for job data"}; } + } // Find free job slot for (size_t i = 0; i < MAX_JOBS; ++i) { @@ -1041,11 +1434,18 @@ std::pair submit_job_async(const std::vector& reques strncpy(job_queue[i].job_id, job_id.c_str(), sizeof(job_queue[i].job_id) - 1); job_queue[i].problem_type = is_mip ? 1 : 0; job_queue[i].data_size = request_data.size(); - strncpy(job_queue[i].shm_data_name, shm_name.c_str(), sizeof(job_queue[i].shm_data_name) - 1); - job_queue[i].worker_pid = 0; - job_queue[i].claimed = false; - job_queue[i].cancelled = false; - job_queue[i].ready = true; // Mark as ready last + if (!config.use_pipes) { + strncpy( + job_queue[i].shm_data_name, shm_name.c_str(), sizeof(job_queue[i].shm_data_name) - 1); + } else { + job_queue[i].shm_data_name[0] = '\0'; + } + job_queue[i].worker_pid = 0; + job_queue[i].worker_index = -1; + job_queue[i].data_sent = false; + job_queue[i].claimed = false; + job_queue[i].cancelled = false; + job_queue[i].ready = true; // Mark as ready last // Track job { @@ -1065,8 +1465,13 @@ std::pair submit_job_async(const std::vector& reques } } - // No free slot - cleanup the shm we created - shm_unlink(shm_name.c_str()); + // No free slot - cleanup + if (config.use_pipes) { + std::lock_guard lock(pending_data_mutex); + pending_job_data.erase(job_id); + } else { + shm_unlink(shm_name.c_str()); + } return {false, "Job queue full"}; } @@ -1697,6 +2102,9 @@ void print_usage(const char* prog) << " -w NUM Number of worker processes (default: 1)\n" << " -q Quiet mode (less verbose output)\n" << " --no-stream Disable real-time log streaming to clients\n" + << " --use-shm Use POSIX shared memory for IPC (default: pipes)\n" + << " Pipes are container-friendly; shm may be faster but\n" + << " requires /dev/shm with sufficient size\n" << " -h Show this help\n" << "\n" << "Environment Variables (client-side):\n" @@ -1715,6 +2123,8 @@ int main(int argc, char** argv) config.verbose = false; } else if (strcmp(argv[i], "--no-stream") == 0) { config.stream_logs = false; + } else if (strcmp(argv[i], "--use-shm") == 0) { + config.use_pipes = false; // Use shared memory instead of pipes } else if (strcmp(argv[i], "-h") == 0) { print_usage(argv[0]); return 0; @@ -1736,6 +2146,8 @@ int main(int argc, char** argv) std::cout << "Port: " << config.port << "\n"; std::cout << "Workers: " << config.num_workers << " (processes)\n"; std::cout << "Log streaming: " << (config.stream_logs ? "enabled" : "disabled") << "\n"; + std::cout << "IPC mode: " << (config.use_pipes ? "pipes (container-friendly)" : "shared memory") + << "\n"; std::cout << "\n"; std::cout << "Async API:\n"; std::cout << " SUBMIT_JOB - Submit a job, get job_id\n"; From 52f237961fc90ec4d7283086f64c9ac42f23df8b Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 13:25:19 -0500 Subject: [PATCH 25/37] Ignore SIGPIPE to prevent server crash when worker is killed When a running job is cancelled, the worker is killed with SIGKILL which closes its pipe ends. If the server is in the middle of writing to the worker's input pipe, it receives SIGPIPE which would terminate the server. By ignoring SIGPIPE, the write() call returns -1 with errno=EPIPE instead, which is handled gracefully (logged as 'Failed to send job data'). Tested: Both pipe mode and shm mode work correctly with job cancellation and worker restart. --- cpp/cuopt_remote_server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 4f666f530..25c42cbee 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -2134,6 +2134,7 @@ int main(int argc, char** argv) // Set up signal handlers signal(SIGINT, signal_handler); signal(SIGTERM, signal_handler); + signal(SIGPIPE, SIG_IGN); // Ignore SIGPIPE (broken pipe) - happens when writing to closed pipes // IMPORTANT: Clear remote solve environment variables to prevent infinite recursion unsetenv("CUOPT_REMOTE_HOST"); From 018ca115512ba99e4b5dda5295d0e25861b51809 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 12 Jan 2026 13:27:50 -0500 Subject: [PATCH 26/37] Add dev_scripts directory for development test scripts These scripts are for manual testing during development and won't be run by CI (pytest looks in specific directories like python/cuopt/). - test_pipe_cancel_restart.py: Tests job cancellation and worker restart - test_python_problem_remote.py: Tests Python Problem class with remote solve --- dev_scripts/test_pipe_cancel_restart.py | 288 ++++++++++++++++++++++ dev_scripts/test_python_problem_remote.py | 179 ++++++++++++++ 2 files changed, 467 insertions(+) create mode 100644 dev_scripts/test_pipe_cancel_restart.py create mode 100644 dev_scripts/test_python_problem_remote.py diff --git a/dev_scripts/test_pipe_cancel_restart.py b/dev_scripts/test_pipe_cancel_restart.py new file mode 100644 index 000000000..963d382d6 --- /dev/null +++ b/dev_scripts/test_pipe_cancel_restart.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Test job cancellation with pipe mode and verify worker restart/pipe recreation. + +This test: +1. Submits a long-running LP job (with many iterations) +2. Cancels it while running +3. Verifies the cancellation succeeded +4. Submits a simple job and verifies it completes (tests pipe recreation) +""" + +import socket +import struct +import time + +# Import the generated protobuf module +import sys + +sys.path.insert(0, "/home/tmckay/repos/nvidia-cuopt/cpp/build") +import cuopt_remote_pb2 + +HOST = "localhost" +PORT = 8765 + + +def send_recv(sock, data): + """Send request and receive response with uint64 size prefix.""" + # Send size (uint64) + data + sock.sendall(struct.pack("= 1 + for j in range(10): + var_idx = (i + j) % n_vars + indices.append(var_idx) + values.append(1.0) + offsets.append(len(indices)) + + prob.A_offsets.extend(offsets) + prob.A_indices.extend(indices) + prob.A.extend(values) + + # Row types: all >= constraints + prob.row_types = bytes([ord("G")] * n_constraints) + + # Settings to make it run longer + lp.settings.log_to_console = True + lp.settings.time_limit = 60.0 # Allow up to 60 seconds + + return req.SerializeToString() + + +def create_simple_lp(): + """Create a simple LP that solves quickly.""" + req = cuopt_remote_pb2.AsyncRequest() + req.request_type = cuopt_remote_pb2.SUBMIT_JOB + req.blocking = False + + lp = req.lp_request + prob = lp.problem + prob.maximize = False + + # Simple: minimize x + 2y subject to x + y >= 1, x,y >= 0 + prob.c.extend([1.0, 2.0]) + prob.variable_lower_bounds.extend([0.0, 0.0]) + prob.variable_upper_bounds.extend([float("inf"), float("inf")]) + prob.constraint_lower_bounds.extend([1.0]) + prob.constraint_upper_bounds.extend([float("inf")]) + prob.A_offsets.extend([0, 2]) + prob.A_indices.extend([0, 1]) + prob.A.extend([1.0, 1.0]) + prob.row_types = bytes([ord("G")]) + lp.settings.log_to_console = True + + return req.SerializeToString() + + +def submit_job(sock, request_data): + """Submit a job and return the job_id.""" + response_data = send_recv(sock, request_data) + response = cuopt_remote_pb2.AsyncResponse() + response.ParseFromString(response_data) + + if response.submit_response.status != cuopt_remote_pb2.SUCCESS: + raise Exception(f"Submit failed: {response.submit_response.message}") + + return ( + response.submit_response.job_id.decode() + if isinstance(response.submit_response.job_id, bytes) + else response.submit_response.job_id + ) + + +def check_status(sock, job_id): + """Check job status.""" + req = cuopt_remote_pb2.AsyncRequest() + req.request_type = cuopt_remote_pb2.CHECK_STATUS + req.job_id = job_id.encode() if isinstance(job_id, str) else job_id + + response_data = send_recv(sock, req.SerializeToString()) + response = cuopt_remote_pb2.AsyncResponse() + response.ParseFromString(response_data) + + return ( + response.status_response.job_status, + response.status_response.message, + ) + + +def cancel_job(sock, job_id): + """Cancel a job.""" + req = cuopt_remote_pb2.AsyncRequest() + req.request_type = cuopt_remote_pb2.CANCEL_JOB + req.job_id = job_id.encode() if isinstance(job_id, str) else job_id + + response_data = send_recv(sock, req.SerializeToString()) + response = cuopt_remote_pb2.AsyncResponse() + response.ParseFromString(response_data) + + success = response.cancel_response.status == cuopt_remote_pb2.SUCCESS + return success, response.cancel_response.message + + +def wait_for_result(sock, job_id): + """Wait for job to complete using WAIT_FOR_RESULT.""" + req = cuopt_remote_pb2.AsyncRequest() + req.request_type = cuopt_remote_pb2.WAIT_FOR_RESULT + req.job_id = job_id.encode() if isinstance(job_id, str) else job_id + + response_data = send_recv(sock, req.SerializeToString()) + response = cuopt_remote_pb2.AsyncResponse() + response.ParseFromString(response_data) + + return ( + response.result_response.status == cuopt_remote_pb2.SUCCESS, + response, + ) + + +def main(): + print("=" * 60) + print("Test: Job Cancellation with Pipe Mode + Worker Restart") + print("=" * 60) + + # Step 1: Submit a long-running job + print("\n[1] Submitting long-running LP job...") + sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock1.connect((HOST, PORT)) + + long_job_data = create_long_running_lp() + job_id = submit_job(sock1, long_job_data) + print(f" Job submitted: {job_id}") + sock1.close() + + # Step 2: Wait a bit for job to start processing + print("\n[2] Waiting for job to start processing...") + time.sleep(2) + + sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock2.connect((HOST, PORT)) + status, msg = check_status(sock2, job_id) + print(f" Status: {cuopt_remote_pb2.JobStatus.Name(status)} - {msg}") + sock2.close() + + # Step 3: Cancel the job + print("\n[3] Cancelling the job...") + sock3 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock3.connect((HOST, PORT)) + success, msg = cancel_job(sock3, job_id) + print(f" Cancel result: success={success}, message={msg}") + sock3.close() + + # Step 4: Verify job is cancelled + print("\n[4] Verifying job status after cancellation...") + time.sleep( + 1 + ) # Give time for worker to be killed and result to be recorded + + sock4 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock4.connect((HOST, PORT)) + status, msg = check_status(sock4, job_id) + print(f" Status: {cuopt_remote_pb2.JobStatus.Name(status)} - {msg}") + sock4.close() + + if status != cuopt_remote_pb2.CANCELLED: + print( + f" WARNING: Expected CANCELLED, got {cuopt_remote_pb2.JobStatus.Name(status)}" + ) + else: + print(" ✓ Job successfully cancelled!") + + # Step 5: Wait a bit for worker to restart with new pipes + print("\n[5] Waiting for worker to restart (with new pipes)...") + time.sleep(2) + + # Step 6: Submit a new simple job to test pipe recreation + print("\n[6] Submitting new simple LP job to test pipe recreation...") + sock5 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock5.connect((HOST, PORT)) + + simple_job_data = create_simple_lp() + job_id2 = submit_job(sock5, simple_job_data) + print(f" Job submitted: {job_id2}") + sock5.close() + + # Step 7: Wait for the new job to complete + print("\n[7] Waiting for new job to complete...") + sock6 = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock6.connect((HOST, PORT)) + success, response = wait_for_result(sock6, job_id2) + sock6.close() + + if success: + print(" ✓ New job completed successfully!") + if response.result_response.HasField("lp_solution"): + sol = response.result_response.lp_solution + obj = sol.primal_objective + print(f" Objective value: {obj}") + if abs(obj - 1.0) < 0.01: + print(" ✓ Objective matches expected value (1.0)!") + else: + print(f" WARNING: Expected objective ~1.0, got {obj}") + else: + print(" ✗ New job failed!") + return 1 + + print("\n" + "=" * 60) + print("TEST PASSED: Cancellation and pipe recreation work correctly!") + print("=" * 60) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/dev_scripts/test_python_problem_remote.py b/dev_scripts/test_python_problem_remote.py new file mode 100644 index 000000000..63c766329 --- /dev/null +++ b/dev_scripts/test_python_problem_remote.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Test Python Problem class with remote solve. + +This test uses the high-level Python Problem class (not raw protobuf) and +verifies it works with both local and remote solve. + +Usage: + # Test local solve (requires GPU) + python test_python_problem_remote.py local + + # Test remote solve (requires cuopt_remote_server running) + CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=8765 python test_python_problem_remote.py remote +""" + +import os +import sys + + +def test_problem_class(): + """Test the Python Problem class API.""" + from cuopt import linear_programming + from cuopt.linear_programming.problem import Problem, MINIMIZE + + # Check if remote solve is configured + remote_host = os.environ.get("CUOPT_REMOTE_HOST", "") + remote_port = os.environ.get("CUOPT_REMOTE_PORT", "") + is_remote = bool(remote_host and remote_port) + + mode = "REMOTE" if is_remote else "LOCAL" + print(f"=== Testing Python Problem class ({mode} solve) ===") + if is_remote: + print(f" Server: {remote_host}:{remote_port}") + + # Create a simple LP: + # minimize: x + 2y + # subject to: + # x + y >= 1 + # x, y >= 0 + + problem = Problem("SimpleLP") + + # Add variables + x = problem.addVariable(name="x", lb=0.0) + y = problem.addVariable(name="y", lb=0.0) + + # Add constraint: x + y >= 1 + problem.addConstraint(x + y >= 1, name="c1") + + # Set objective: minimize x + 2y + problem.setObjective(x + 2 * y, sense=MINIMIZE) + + print(f" Variables: {problem.NumVariables}") + print(f" Constraints: {problem.NumConstraints}") + + # Solve + settings = linear_programming.SolverSettings() + settings.log_to_console = True + + print(" Solving...") + problem.solve(settings) + + # Check results + print(f" Status: {problem.Status}") + print(f" Objective value: {problem.ObjValue}") + print(f" Solve time: {problem.SolveTime:.4f}s") + print(f" x = {x.Value}") + print(f" y = {y.Value}") + + # Verify solution + # Optimal solution should be x=1, y=0 with objective=1 + # (since y has coefficient 2 and we minimize) + expected_obj = 1.0 + tolerance = 0.01 + + if abs(problem.ObjValue - expected_obj) < tolerance: + print( + f"\n=== SUCCESS: Objective {problem.ObjValue:.4f} matches expected {expected_obj} ===" + ) + return True + else: + print( + f"\n=== FAILED: Objective {problem.ObjValue:.4f} != expected {expected_obj} ===" + ) + return False + + +def test_mip_problem(): + """Test a MIP problem.""" + from cuopt import linear_programming + from cuopt.linear_programming.problem import Problem, INTEGER, MINIMIZE + + remote_host = os.environ.get("CUOPT_REMOTE_HOST", "") + remote_port = os.environ.get("CUOPT_REMOTE_PORT", "") + is_remote = bool(remote_host and remote_port) + mode = "REMOTE" if is_remote else "LOCAL" + + print(f"\n=== Testing Python MIP Problem class ({mode} solve) ===") + + # Simple MIP: + # minimize: x + y + # subject to: + # x + y >= 2.5 + # x, y >= 0, x integer + + problem = Problem("SimpleMIP") + + # Add variables + x = problem.addVariable(name="x", lb=0.0, vtype=INTEGER) + y = problem.addVariable(name="y", lb=0.0) + + # Add constraint + problem.addConstraint(x + y >= 2.5, name="c1") + + # Set objective + problem.setObjective(x + y, sense=MINIMIZE) + + print(f" Variables: {problem.NumVariables} (1 integer)") + print(f" Constraints: {problem.NumConstraints}") + + # Solve with MIP settings + settings = linear_programming.SolverSettings() + settings.log_to_console = True + settings.time_limit = 60.0 + + print(" Solving...") + problem.solve(settings) + + print(f" Status: {problem.Status}") + print(f" Objective value: {problem.ObjValue}") + print(f" x = {x.Value} (integer)") + print(f" y = {y.Value}") + + # Since x must be integer, optimal is x=1 or x=2 + y to make sum >= 2.5 + # x=1, y=1.5 gives obj=2.5 + # x=2, y=0.5 gives obj=2.5 + # x=3, y=0 gives obj=3 + # So optimal is 2.5 + expected_obj = 2.5 + tolerance = 0.1 + + if abs(problem.ObjValue - expected_obj) < tolerance: + print( + f"\n=== SUCCESS: MIP objective {problem.ObjValue:.4f} matches expected {expected_obj} ===" + ) + return True + else: + print( + f"\n=== FAILED: MIP objective {problem.ObjValue:.4f} != expected {expected_obj} ===" + ) + return False + + +if __name__ == "__main__": + # Show usage hint + if len(sys.argv) > 1 and sys.argv[1] in ("--help", "-h"): + print(__doc__) + sys.exit(0) + + # Run tests + try: + lp_ok = test_problem_class() + mip_ok = test_mip_problem() + + if lp_ok and mip_ok: + print("\n=== All tests PASSED ===") + sys.exit(0) + else: + print("\n=== Some tests FAILED ===") + sys.exit(1) + except Exception as e: + print(f"\n=== ERROR: {e} ===") + import traceback + + traceback.print_exc() + sys.exit(1) From 5574e743cfafcb3b03137a0b46c77918c8fd9958 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 11:13:10 -0500 Subject: [PATCH 27/37] Fix protobuf linking scope in CMakeLists.txt Move protobuf::libprotobuf from PUBLIC to PRIVATE linking for both cuopt and cuopt_remote_server libraries. This fixes Python wheel build failures where protobuf was incorrectly exposed as a public dependency. --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index dddc640ea..954270df7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -364,10 +364,10 @@ target_link_libraries(cuopt CCCL::CCCL raft::raft ${CUDSS_LIB_FILE} - protobuf::libprotobuf PRIVATE ${CUOPT_PRIVATE_CUDA_LIBS} cuopt::mps_parser_static # Static link - symbols embedded in libcuopt.so + protobuf::libprotobuf ) @@ -570,8 +570,8 @@ if(BUILD_REMOTE_SERVER AND NOT BUILD_LP_ONLY) PUBLIC cuopt OpenMP::OpenMP_CXX - protobuf::libprotobuf PRIVATE + protobuf::libprotobuf ) # Use RUNPATH so LD_LIBRARY_PATH can override conda paths during development set_target_properties(cuopt_remote_server PROPERTIES From 662e4642cab90ee33b8949f74ba08b45fa98fbec Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 12:36:25 -0500 Subject: [PATCH 28/37] Fix pipe IPC deadlock in worker_process Remove unnecessary data_sent flag wait loop that caused 30s timeout. The pipe read operation naturally blocks until data is available, so explicit polling of the data_sent flag is redundant and can cause the worker to timeout before the server writes data. This fixes the 'Timeout waiting for job data' error when using pipe-based IPC with large problems. Bug: Worker would timeout after 30 seconds waiting for data_sent flag Fix: Worker now directly reads from pipe (blocking read) Result: Data transfer happens as soon as server writes to pipe --- cpp/cuopt_remote_server.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 25c42cbee..cb07ed50c 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -616,24 +616,12 @@ void worker_process(int worker_id) bool read_success = false; if (config.use_pipes) { - // Pipe mode: wait for server to send data, then read from pipe - // Wait for data_sent flag with timeout - int wait_count = 0; - while (!job.data_sent && !job.cancelled && !shm_ctrl->shutdown_requested) { - usleep(1000); // 1ms - if (++wait_count > 30000) { // 30 second timeout - std::cerr << "[Worker " << worker_id << "] Timeout waiting for job data\n"; - break; - } - } - - if (job.data_sent && !job.cancelled) { - // Read from our input pipe - int read_fd = worker_pipes[worker_id].worker_read_fd; - read_success = recv_job_data_pipe(read_fd, job.data_size, request_data); - if (!read_success) { - std::cerr << "[Worker " << worker_id << "] Failed to read job data from pipe\n"; - } + // Pipe mode: read from pipe (blocks until server writes data) + // No need to wait for data_sent flag - pipe read naturally blocks + int read_fd = worker_pipes[worker_id].worker_read_fd; + read_success = recv_job_data_pipe(read_fd, job.data_size, request_data); + if (!read_success) { + std::cerr << "[Worker " << worker_id << "] Failed to read job data from pipe\n"; } } else { // SHM mode: read from shared memory From a4b86cf302a2caa397904869490f4bf7ccc38aab Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 14:25:18 -0500 Subject: [PATCH 29/37] Add comprehensive remote solve modes documentation Document all three operating modes (Sync, Async, WAIT_FOR_RESULT) with: - Detailed workflow descriptions and use cases - Log retrieval methods for each mode - Complete API endpoint reference - Python usage examples and best practices - Architecture diagrams for all modes - Future API design recommendations This consolidates the remote solve protocol documentation into a single reference for users and developers. --- REMOTE_SOLVE_MODES.md | 888 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 888 insertions(+) create mode 100644 REMOTE_SOLVE_MODES.md diff --git a/REMOTE_SOLVE_MODES.md b/REMOTE_SOLVE_MODES.md new file mode 100644 index 000000000..d33b17b1b --- /dev/null +++ b/REMOTE_SOLVE_MODES.md @@ -0,0 +1,888 @@ +# cuOpt Remote Solve Modes and Protocols + +This document describes the different operating modes, log retrieval methods, and APIs available for cuOpt remote solving. + +--- + +## Table of Contents + +1. [Operating Modes](#1-operating-modes) +2. [Supported Interfaces](#2-supported-interfaces) +3. [Log Retrieval Methods](#3-log-retrieval-methods) +4. [API Endpoints](#4-api-endpoints) +5. [WAIT_FOR_RESULT API](#5-wait_for_result-api) +6. [Detection and Configuration](#6-detection-and-configuration) +7. [Job Status States](#7-job-status-states) +8. [Python APIs](#8-python-apis) +9. [Workflow Comparisons](#9-workflow-comparisons) +10. [Best Practices](#10-best-practices) + +--- + +## 1. Operating Modes + +### 1.1 Sync Mode (`CUOPT_REMOTE_USE_SYNC=1`) + +**Behavior**: Client sends request and **blocks** until completion + +**Architecture**: +- Job still goes through server queue and worker process +- Server uses condition variable to block connection until job completes +- Returns result directly when complete + +**Log Streaming**: Real-time log streaming during solve + +**Use Case**: Interactive/development use where you want immediate feedback + +**Workflow**: +``` +Client → SUBMIT_JOB (blocking=true) → Server blocks → Worker solves → Server returns result +``` + +**Example**: +```bash +CUOPT_REMOTE_HOST=localhost \ +CUOPT_REMOTE_PORT=8765 \ +CUOPT_REMOTE_USE_SYNC=1 \ + cuopt_cli --log-to-console 1 problem.mps +``` + +--- + +### 1.2 Async Mode (default) + +**Behavior**: Client submits job, receives `job_id` immediately, non-blocking + +**Architecture**: +- Client polls for status (QUEUED → PROCESSING → COMPLETED/FAILED) +- Client retrieves logs incrementally using byte offset +- Client retrieves result when ready +- Client deletes job to free server memory + +**Log Retrieval**: Incremental polling via `GET_LOGS` API + +**Use Case**: Batch processing, long-running jobs, multiple concurrent jobs + +**Workflow**: +``` +Client → SUBMIT_JOB → job_id + ↓ (poll) + → CHECK_STATUS (returns QUEUED/PROCESSING/COMPLETED/FAILED) + → GET_LOGS (frombyte offset, returns new log lines) + → GET_RESULT (when COMPLETED) + → DELETE_RESULT (cleanup) +``` + +**Example**: +```bash +CUOPT_REMOTE_HOST=localhost \ +CUOPT_REMOTE_PORT=8765 \ + cuopt_cli problem.mps +``` + +--- + +### 1.3 Hybrid Mode (Async Submit + WAIT_FOR_RESULT) + +**Behavior**: Submit async to get `job_id`, then block on wait + +**Architecture**: +- Non-blocking submission returns `job_id` immediately +- `WAIT_FOR_RESULT` API blocks until completion +- Can stream logs in parallel thread while waiting + +**Log Retrieval**: Parallel thread polls `GET_LOGS` while main thread waits + +**Use Case**: Interactive use with job management (cancellation, log streaming) + +**Workflow**: +``` +Client → SUBMIT_JOB (blocking=false) → job_id + ↓ +[Thread 1] WAIT_FOR_RESULT(job_id) [BLOCKS until complete] +[Thread 2] while running: GET_LOGS(job_id, frombyte) + ↓ +result returned automatically +DELETE_RESULT(job_id) +``` + +**Example**: See [Section 5.3](#53-python-usage-example) for Python code + +--- + +## 2. Supported Interfaces + +| Interface | Sync Mode | Async Mode | WAIT_FOR_RESULT | +|-----------|-----------|------------|-----------------| +| **C++ API** (`solve_lp`, `solve_mip`) | ✓ | ✓ | ✓ (internal) | +| **Python API** (`Solve()`, `Problem.solve()`) | ✓ | ✓ | ❌ (not wrapped) | +| **cuopt_cli** | ✓ | ✓ | ❌ (not exposed) | +| **C API** | ✓ | ✓ | ❌ (not exposed) | +| **Python `cancel_job()`** | - | ✓ | ✓ | +| **Low-level Protobuf** | ✓ | ✓ | ✓ | + +**Note**: All high-level interfaces are **transparent** - they automatically detect remote solve via environment variables and handle the full async polling loop internally. + +--- + +## 3. Log Retrieval Methods + +### 3.1 Sync Mode Logging + +**Method**: Real-time streaming + +**How it works**: +- Server captures stdout from worker process +- Streams log data to client in real-time over TCP connection +- Logs printed to console as solver runs + +**Control**: Set `log_to_console=1` in solver settings + +**Example**: +```bash +CUOPT_REMOTE_HOST=localhost \ +CUOPT_REMOTE_PORT=8765 \ +CUOPT_REMOTE_USE_SYNC=1 \ + cuopt_cli --log-to-console 1 problem.mps +``` + +**Output**: Logs appear immediately as solver runs + +--- + +### 3.2 Async Mode Logging + +**Method**: Incremental polling via `GET_LOGS` API + +**How it works**: +- Logs written to `/tmp/cuopt_logs/log_{job_id}` on server +- Client calls `GET_LOGS` with byte offset (`frombyte`) +- Server returns new log content from offset to current position +- Client updates offset for next poll + +**Implementation** (C++ client): +```cpp +int64_t log_frombyte = 0; +while (true) { + auto [job_exists, new_frombyte] = get_logs(host, port, job_id, log_frombyte); + if (job_exists) { + log_frombyte = new_frombyte; + } + // Check status... + sleep(0.1); +} +``` + +**Python example** (low-level): +```python +req = pb.AsyncRequest() +req.request_type = pb.GET_LOGS +req.job_id = job_id +req.frombyte = frombyte + +response = send_recv(req) +logs = response.logs_response +for line in logs.log_lines: + print(line) +frombyte = logs.nbytes # Update for next poll +``` + +--- + +### 3.3 WAIT_FOR_RESULT Logging + +**Method**: Parallel thread polling `GET_LOGS` while main thread waits + +**How it works**: +- Main thread blocks on `WAIT_FOR_RESULT` +- Separate thread continuously polls `GET_LOGS` +- Both threads access same job using `job_id` +- Log thread stops when main thread returns result + +**Advantages**: +- Real-time log visibility +- No need to manually poll status +- Clean blocking semantics with parallel logging + +See [Section 5.3](#53-python-usage-example) for complete code example. + +--- + +## 4. API Endpoints + +### 4.1 Protobuf Protocol + +All communication uses length-prefixed Protocol Buffer messages: + +``` +[8-byte size (uint64_t)][serialized protobuf data] +``` + +### 4.2 Request Types + +| Endpoint | Sync | Async | Hybrid | Returns | +|----------|------|-------|--------|---------| +| `SUBMIT_JOB` (blocking=false) | - | ✓ | ✓ | `job_id` | +| `SUBMIT_JOB` (blocking=true) | ✓ | - | - | Full result (blocks) | +| `CHECK_STATUS` | - | ✓ | ✓ | `JobStatus` enum | +| `GET_LOGS` | - | ✓ | ✓ | Log content + new offset | +| `GET_RESULT` | - | ✓ | ✓ | Serialized solution | +| `DELETE_RESULT` | - | ✓ | ✓ | Success status | +| `CANCEL_JOB` | - | ✓ | ✓ | Cancel status | +| `WAIT_FOR_RESULT` | - | - | ✓ | Serialized solution (blocks) | + +### 4.3 Protobuf Enum Definition + +File: `cpp/src/linear_programming/utilities/cuopt_remote.proto` + +```protobuf +enum AsyncRequestType { + SUBMIT_JOB = 0; // Submit a new job + CHECK_STATUS = 1; // Check job status + GET_RESULT = 2; // Retrieve completed result + DELETE_RESULT = 3; // Delete result from server + GET_LOGS = 4; // Retrieve buffered log entries + CANCEL_JOB = 5; // Cancel a queued or running job + WAIT_FOR_RESULT = 6; // Block until job completes, returns result +} +``` + +--- + +## 5. WAIT_FOR_RESULT API + +### 5.1 Overview + +`WAIT_FOR_RESULT` is a **hybrid async/blocking mode** that combines the benefits of both sync and async modes: + +- **Submit async** → Get `job_id` back immediately (non-blocking submission) +- **Wait on result** → Block until job completes (no polling loop needed) +- **Stream logs in parallel** → Another thread can poll `GET_LOGS` while waiting + +**This is the best of both worlds for interactive use!** + +### 5.2 Server Implementation + +File: `cpp/cuopt_remote_server.cpp` + +#### Core Function (lines 1539-1602) + +```cpp +bool wait_for_result(const std::string& job_id, + std::vector& result_data, + std::string& error_message) +{ + // First check if job already completed + { + std::lock_guard lock(tracker_mutex); + auto it = job_tracker.find(job_id); + + // If already in terminal state, return immediately + if (it->second.status == JobStatus::COMPLETED) { + result_data = it->second.result_data; + return true; + } else if (it->second.status == JobStatus::FAILED) { + error_message = it->second.error_message; + return false; + } else if (it->second.status == JobStatus::CANCELLED) { + error_message = "Job was cancelled"; + return false; + } + } + + // Job is still running - create a waiter and wait on condition variable + auto waiter = std::make_shared(); + { + std::lock_guard lock(waiters_mutex); + waiting_threads[job_id] = waiter; + } + + // Wait on the condition variable - this thread will sleep until signaled + { + std::unique_lock lock(waiter->mutex); + waiter->cv.wait(lock, [&waiter] { return waiter->ready; }); + } + + // Wakes up when result_retrieval_thread signals the CV + if (waiter->success) { + result_data = std::move(waiter->result_data); + return true; + } else { + error_message = waiter->error_message; + return false; + } +} +``` + +#### Synchronization Mechanism + +**JobWaiter struct**: +```cpp +struct JobWaiter { + std::mutex mutex; + std::condition_variable cv; + bool ready = false; + bool success = false; + std::vector result_data; + std::string error_message; +}; +``` + +**Signaling in result_retrieval_thread** (line 1186-1198): +```cpp +// Check if there's a blocking waiter +{ + std::lock_guard lock(waiters_mutex); + auto wit = waiting_threads.find(job_id); + if (wit != waiting_threads.end()) { + // Wake up the waiting thread + auto waiter = wit->second; + waiter->result_data = std::move(result_data); + waiter->error_message = error_message; + waiter->success = success; + waiter->ready = true; + waiter->cv.notify_one(); // <-- WAKE UP! + } +} +``` + +### 5.3 Python Usage Example + +File: `test_wait_with_logs.py` + +```python +import threading +import time +import cuopt_remote_pb2 as pb + +# 1. Submit job (async) to get job_id +req = pb.AsyncRequest() +req.request_type = pb.SUBMIT_JOB +req.blocking = False # Get job_id back immediately + +# Set up problem... +lp = req.lp_request +lp.problem.c.extend([1.0] * n_vars) +# ... more problem setup ... + +response = send_recv(req) +job_id = response.submit_response.job_id +print(f"Job ID: {job_id}") + +# 2. Start log streaming thread +def log_streaming_thread(job_id, stop_event): + frombyte = 0 + while not stop_event.is_set(): + # Poll GET_LOGS + req = pb.AsyncRequest() + req.request_type = pb.GET_LOGS + req.job_id = job_id + req.frombyte = frombyte + + response = send_recv(req) + logs_resp = response.logs_response + + if logs_resp.log_lines: + for line in logs_resp.log_lines: + print(f"[LOG] {line}") + frombyte = logs_resp.nbytes + + time.sleep(0.05) # Small delay to avoid hammering server + +stop_event = threading.Event() +log_thread = threading.Thread(target=log_streaming_thread, args=(job_id, stop_event)) +log_thread.start() + +# 3. Main thread: Wait for result (BLOCKS) +print("Calling WAIT_FOR_RESULT (blocking)...") +req = pb.AsyncRequest() +req.request_type = pb.WAIT_FOR_RESULT +req.job_id = job_id + +start = time.time() +response = send_recv(req, timeout=120) +elapsed = time.time() - start + +# 4. Stop log thread +stop_event.set() +log_thread.join() + +# 5. Use result +result = response.result_response +if result.HasField('lp_solution'): + sol = result.lp_solution + print(f"Completed in {elapsed:.2f}s") + print(f"Objective: {sol.primal_objective:.6f}") +else: + print(f"Error: {result.error_message}") + +# 6. Cleanup +del_req = pb.AsyncRequest() +del_req.request_type = pb.DELETE_RESULT +del_req.job_id = job_id +send_recv(del_req) +``` + +### 5.4 Key Benefits + +| Feature | Sync Mode | Async Polling | **WAIT_FOR_RESULT** | +|---------|-----------|---------------|---------------------| +| Non-blocking submit | ❌ | ✓ | ✓ | +| Get job_id back | ❌ | ✓ | ✓ | +| Parallel log streaming | ❌ | ✓ | ✓ | +| No polling loop needed | ✓ | ❌ | ✓ | +| Connection efficiency | Poor | Good | Good | +| Can cancel job | ✓ | ✓ | ✓ | + +**WAIT_FOR_RESULT combines all the best features!** + +### 5.5 Important Notes + +1. **No auto-delete**: `WAIT_FOR_RESULT` does NOT automatically delete the job after returning the result. This allows you to: + - Retrieve remaining logs with `GET_LOGS` after completion + - Call `DELETE_RESULT` when you're done with logs + +2. **Connection held open**: The TCP connection remains open while waiting (can be many seconds/minutes for large problems) + +3. **Same result format**: The response is identical to `GET_RESULT` (serialized solution) + +4. **Thread-safe**: Multiple clients can wait on different jobs simultaneously + +5. **Not yet wrapped**: `WAIT_FOR_RESULT` is currently only available via low-level Protobuf protocol. High-level Python/C++ wrappers would need to be added (similar to existing `cancel_job()` wrapper). + +--- + +## 6. Detection and Configuration + +### 6.1 Transparent Remote Solve Detection + +The same code works for both local and remote: + +```python +# Works locally if no env vars set, remotely if set +solution = solve_lp(data_model, settings) +``` + +**Detection logic** (in all interfaces): +```cpp +bool is_remote = (getenv("CUOPT_REMOTE_HOST") && getenv("CUOPT_REMOTE_PORT")); +bool sync_mode = (getenv("CUOPT_REMOTE_USE_SYNC") == "1"); +``` + +### 6.2 Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `CUOPT_REMOTE_HOST` | Server hostname/IP | (none - local solve) | +| `CUOPT_REMOTE_PORT` | Server port | (none - local solve) | +| `CUOPT_REMOTE_USE_SYNC` | Use sync mode if "1" | "0" (async) | +| `CUOPT_SERIALIZER_LIB` | Path to custom serializer | (uses protobuf) | + +**Example**: +```bash +export CUOPT_REMOTE_HOST=gpu-server.example.com +export CUOPT_REMOTE_PORT=8765 +export CUOPT_REMOTE_USE_SYNC=1 # Optional: force sync mode +``` + +--- + +## 7. Job Status States + +``` +QUEUED (0) → Job waiting for available worker +PROCESSING (1) → Worker is solving the problem +COMPLETED (2) → Solve finished successfully +FAILED (3) → Solve failed with error +NOT_FOUND (4) → Job ID doesn't exist on server +CANCELLED (5) → Job was cancelled by user +``` + +**State Transitions**: +``` +QUEUED → PROCESSING → COMPLETED + → FAILED + → CANCELLED (via CANCEL_JOB) +``` + +--- + +## 8. Python APIs + +### 8.1 Cancel Job (Available Now) + +File: `python/cuopt/cuopt/linear_programming/remote.py` + +```python +from cuopt.linear_programming import cancel_job, JobStatus + +result = cancel_job("job_abc123", host="localhost", port=8765) +print(f"Success: {result.success}") +print(f"Status: {result.job_status}") # JobStatus enum +print(f"Message: {result.message}") +``` + +**Returns**: `CancelResult` dataclass +- `success: bool` - Whether cancellation succeeded +- `message: str` - Status message +- `job_status: JobStatus` - Final job status + +### 8.2 Future API (Not Yet Implemented) + +To match the `cancel_job()` pattern, these could be added: + +```python +# Submit async and get job_id +def submit_job_async(data_model, settings, host=None, port=None) -> str: + """Submit job and return job_id immediately.""" + pass + +# Block until complete +def wait_for_result(job_id: str, host=None, port=None) -> Solution: + """Wait for job completion and return result (blocks).""" + pass + +# Poll for status +def get_job_status(job_id: str, host=None, port=None) -> JobStatus: + """Check job status without blocking.""" + pass + +# Get logs +def get_job_logs(job_id: str, frombyte: int = 0, host=None, port=None) -> tuple[list[str], int]: + """Get logs from byte offset, returns (log_lines, new_offset).""" + pass + +# Cleanup +def delete_job(job_id: str, host=None, port=None) -> bool: + """Delete job from server.""" + pass +``` + +--- + +## 9. Workflow Comparisons + +### 9.1 Sync Mode Workflow + +``` +Client: + connect() + send(SUBMIT_JOB, blocking=true, problem_data) + [BLOCKS - connection stays open] + receive(solution) + [Logs streamed in real-time during blocking] + disconnect() +``` + +**Pros**: +- Simple - one request/response +- Real-time logs +- Blocking semantics + +**Cons**: +- Connection held open entire time +- Can't cancel easily +- No job_id for tracking + +--- + +### 9.2 Async Mode Workflow (Traditional Polling) + +``` +Client: + # Submit + job_id = submit_job(problem) + + # Poll until complete + while True: + status = check_status(job_id) + if status == COMPLETED: + break + if status == FAILED: + error() + + # Optionally get logs + if verbose: + logs = get_logs(job_id, frombyte) + print(logs) + frombyte = update_offset(logs) + + sleep(0.1) + + # Retrieve result + solution = get_result(job_id) + + # Cleanup + delete_job(job_id) +``` + +**Pros**: +- Non-blocking +- Can cancel anytime +- Job_id for tracking +- Multiple clients can monitor same job + +**Cons**: +- Polling loop complexity +- Delayed log visibility (polling interval) +- More network requests + +--- + +### 9.3 WAIT_FOR_RESULT Workflow (Best of Both) + +``` +Client: + # Submit async + job_id = submit_job(problem, blocking=false) + + # Start log thread + Thread 1: + while not done: + logs = get_logs(job_id, frombyte) + print(logs) + frombyte = update_offset(logs) + sleep(0.05) + + # Main thread: wait for result + Thread 2 (main): + solution = wait_for_result(job_id) # BLOCKS + + # Stop log thread + stop_log_thread() + + # Cleanup + delete_job(job_id) +``` + +**Pros**: +- Non-blocking submit (get job_id) +- Real-time logs via parallel thread +- Blocking wait (no polling loop) +- Can cancel from another client +- Clean separation of concerns + +**Cons**: +- Requires threading +- Slightly more complex than sync mode + +--- + +## 10. Best Practices + +### 10.1 Choosing a Mode + +**Use Sync Mode when**: +- Interactive development/debugging +- Single-shot solves +- Simplicity is priority +- Don't need job_id + +**Use Async Mode when**: +- Batch processing +- Long-running jobs +- Need to manage multiple jobs +- Want to cancel jobs +- Production systems + +**Use WAIT_FOR_RESULT when**: +- Interactive use +- Want real-time logs +- Need job_id for tracking/cancellation +- Don't want polling complexity + +### 10.2 Log Retrieval Best Practices + +**For Sync Mode**: +```bash +# Always enable log_to_console for sync mode +CUOPT_REMOTE_USE_SYNC=1 cuopt_cli --log-to-console 1 problem.mps +``` + +**For Async Mode**: +```python +# Poll logs frequently for near-real-time visibility +frombyte = 0 +while job_running: + logs = get_logs(job_id, frombyte) + frombyte = logs.nbytes + time.sleep(0.05) # 50ms polling interval +``` + +**For WAIT_FOR_RESULT**: +```python +# Use dedicated thread for logs +log_thread = threading.Thread(target=log_poller, args=(job_id,)) +log_thread.daemon = True # Exit when main thread exits +log_thread.start() + +result = wait_for_result(job_id) +``` + +### 10.3 Error Handling + +**Always check job status**: +```python +result = wait_for_result(job_id) +if result.status == pb.FAILED: + print(f"Error: {result.error_message}") + # Get final logs for debugging + logs = get_logs(job_id, 0) + print(logs) +``` + +**Handle worker crashes**: +```python +# Server automatically marks jobs as FAILED if worker dies +# Check status periodically +status = check_status(job_id) +if status == FAILED: + # Retrieve error message + result = get_result(job_id) # Contains error_message +``` + +### 10.4 Resource Cleanup + +**Always delete completed jobs**: +```python +try: + result = wait_for_result(job_id) + # ... use result ... +finally: + delete_job(job_id) # Free server memory +``` + +**For long-running servers**: +- Implement periodic cleanup of old completed jobs +- Monitor `/tmp/cuopt_logs` directory size +- Consider auto-deletion after N hours + +--- + +## Appendix: Architecture Diagrams + +### A.1 Sync Mode Flow + +``` +Client Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB (blocking=1) │ │ + │──────────────────────────▶│ │ + │ │ Add to job queue │ + │ │──────────────────────────▶│ + │ │ │ + │ (connection held │ │ + │ open, blocking) │ Execute GPU Solve │ + │ │◀─────────────────────────▶│ + │ │ │ + │ │ Logs streamed to client │ + │◀─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─│ │ + │ │ │ + │ │ Write Result │ + │ │◀──────────────────────────│ + │ │ │ + │ Response (solution) │ │ + │◀──────────────────────────│ │ +``` + +### A.2 Async Mode Flow + +``` +Client Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB (blocking=0) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (job_id) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ CHECK_STATUS (job_id) │ │ + │──────────────────────────▶│ │ + │ Response (QUEUED) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Execute GPU Solve │ + │ │◀─────────────────────────▶│ + │ │ │ + │ GET_LOGS (job_id, 0) │ │ + │──────────────────────────▶│ │ + │ Response (logs 0-1000) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ CHECK_STATUS (job_id) │ │ + │──────────────────────────▶│ │ + │ Response (PROCESSING) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Write Result │ + │ │◀──────────────────────────│ + │ │ │ + │ CHECK_STATUS (job_id) │ │ + │──────────────────────────▶│ │ + │ Response (COMPLETED) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ GET_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ Response (solution) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ DELETE_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ Response (SUCCESS) │ │ + │◀──────────────────────────│ │ +``` + +### A.3 WAIT_FOR_RESULT Flow + +``` +Client Server (Main) Worker Process + │ │ │ + │ SUBMIT_JOB (blocking=0) │ │ + │──────────────────────────▶│ │ + │ │ │ + │ Response (job_id) │ │ + │◀──────────────────────────│ │ + │ │ │ + ├─[Thread 1]────────────────┤ │ + │ WAIT_FOR_RESULT (job_id) │ │ + │──────────────────────────▶│ │ + │ │ Handler creates JobWaiter│ + │ (connection held │ Thread blocks on CV │ + │ open, no response) │ │ + │ │ │ + ├─[Thread 2]────────────────┤ │ + │ GET_LOGS (job_id, 0) │ │ + │──────────────────────────▶│ │ + │ Response (logs) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ GET_LOGS (job_id, 1000) │ Execute GPU Solve │ + │──────────────────────────▶│◀─────────────────────────▶│ + │ Response (logs) │ │ + │◀──────────────────────────│ │ + │ │ │ + │ │ Write Result │ + │ │◀──────────────────────────│ + │ │ │ + │ │ Result thread signals CV │ + │ │ Handler wakes up │ + │ │ │ + │ Response (solution) │ │ + │◀──────────────────────────│ │ + └─[Thread 1 returns] │ │ + │ │ │ + └─[Thread 2 stops polling] │ │ +``` + +--- + +## References + +- **Implementation**: `cpp/cuopt_remote_server.cpp` +- **Client Logic**: `cpp/src/linear_programming/utilities/remote_solve.cu` +- **Protocol Definition**: `cpp/src/linear_programming/utilities/cuopt_remote.proto` +- **Python Wrappers**: `python/cuopt/cuopt/linear_programming/remote.py` +- **Architecture Doc**: `docs/remote_solve_architecture.md` +- **Developer Guide**: `docs/developer/REMOTE_SOLVE_GUIDE.md` +- **Test Example**: `test_wait_with_logs.py` From c4e0395ee9d4721e8a277d0eb26dbc474b09388a Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 15:09:29 -0500 Subject: [PATCH 30/37] Fix deadlock in pipe mode for large results and add timeout safety - Fix send_typed_message to write 8-byte payload size (was incorrectly writing 4 bytes) - Fix pipe mode deadlock: set result.ready=true BEFORE writing to pipe (prevents deadlock when result size exceeds pipe buffer ~64KB) - Add 2-minute poll() timeout in read_from_pipe() for safety if worker dies - Add #include for poll() function --- cpp/cuopt_remote_server.cpp | 71 ++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index cb07ed50c..764e684ac 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -111,7 +112,7 @@ static bool send_typed_message(int sockfd, MessageType type, const void* data, s uint64_t payload_size = static_cast(size); if (::write(sockfd, &msg_type, 1) != 1) return false; - if (::write(sockfd, &payload_size, 4) != 4) return false; + if (::write(sockfd, &payload_size, sizeof(payload_size)) != sizeof(payload_size)) return false; if (size > 0) { const uint8_t* ptr = static_cast(data); size_t remaining = size; @@ -707,22 +708,20 @@ void worker_process(int worker_id) close(saved_stdout); close(saved_stderr); + // Store result (pipe mode: write to pipe, shm mode: write to shared memory) if (config.use_pipes) { - // Pipe mode: write result to output pipe - if (success && !result_data.empty()) { - int write_fd = worker_pipes[worker_id].worker_write_fd; - bool write_success = send_result_pipe(write_fd, result_data); - if (!write_success) { - std::cerr << "[Worker " << worker_id << "] Failed to write result to pipe\n"; - success = false; - error_message = "Failed to write result to pipe"; - } - } - - // Store result metadata in result queue + // PIPE MODE: Set result_queue metadata FIRST, THEN write to pipe. + // This avoids deadlock: the main thread's result_retrieval_thread + // needs to see ready=true before it will read from the pipe, + // but if we write to pipe first with a large result, we'll block + // waiting for the reader that will never come. + + // Find a free result slot and populate metadata + int result_slot = -1; for (size_t i = 0; i < MAX_RESULTS; ++i) { if (!result_queue[i].ready) { + result_slot = i; ResultQueueEntry& result = result_queue[i]; strncpy(result.job_id, job_id.c_str(), sizeof(result.job_id) - 1); result.status = success ? 0 : 1; @@ -733,10 +732,26 @@ void worker_process(int worker_id) strncpy(result.error_message, error_message.c_str(), sizeof(result.error_message) - 1); } result.retrieved = false; - result.ready = true; // Mark as ready last + // Set ready=true BEFORE writing to pipe so reader thread starts reading + // This prevents deadlock with large results that exceed pipe buffer size + result.ready = true; break; } } + + // Now write result data to pipe (reader thread should be ready to receive) + if (success && !result_data.empty() && result_slot >= 0) { + int write_fd = worker_pipes[worker_id].worker_write_fd; + bool write_success = send_result_pipe(write_fd, result_data); + if (!write_success) { + std::cerr << "[Worker " << worker_id << "] Failed to write result to pipe\n"; + // Mark as failed in result queue + result_queue[result_slot].status = 1; + strncpy(result_queue[result_slot].error_message, + "Failed to write result to pipe", + sizeof(result_queue[result_slot].error_message) - 1); + } + } } else { // SHM mode: store result in shared memory for (size_t i = 0; i < MAX_RESULTS; ++i) { @@ -1318,15 +1333,39 @@ static bool write_to_pipe(int fd, const void* data, size_t size) return true; } -// Read all data from a pipe (handles partial reads) -static bool read_from_pipe(int fd, void* data, size_t size) +// Read all data from a pipe (handles partial reads) with timeout +// timeout_ms: milliseconds to wait for data (default 120000 = 2 minutes) +static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms = 120000) { uint8_t* ptr = static_cast(data); size_t remaining = size; while (remaining > 0) { + // Use poll() to wait for data with timeout + struct pollfd pfd; + pfd.fd = fd; + pfd.events = POLLIN; + + int poll_result = poll(&pfd, 1, timeout_ms); + if (poll_result < 0) { + if (errno == EINTR) continue; + std::cerr << "[Server] poll() failed on pipe: " << strerror(errno) << "\n"; + return false; + } + if (poll_result == 0) { + std::cerr << "[Server] Timeout waiting for pipe data (waited " << timeout_ms << "ms)\n"; + return false; + } + if (pfd.revents & (POLLERR | POLLHUP | POLLNVAL)) { + std::cerr << "[Server] Pipe error/hangup detected\n"; + return false; + } + ssize_t nread = ::read(fd, ptr, remaining); if (nread <= 0) { if (errno == EINTR) continue; + if (nread == 0) { + std::cerr << "[Server] Pipe EOF (writer closed)\n"; + } return false; } ptr += nread; From afd25ed35b68c59e8848ff44de02b30f1ab890b3 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 15:34:47 -0500 Subject: [PATCH 31/37] Add Python and C API test scripts to dev_scripts - test_python_api.py: Python API test for local/remote solve - test_c_api.c: C API test for local/remote solve - build_c_test.sh: Build script for C test - Fix read_from_pipe forward declaration to match definition (rebase conflict fix) - Add dev_scripts/test_c_api to .gitignore --- .gitignore | 1 + cpp/cuopt_remote_server.cpp | 4 +- dev_scripts/build_c_test.sh | 35 +++++++ dev_scripts/test_c_api.c | 179 +++++++++++++++++++++++++++++++++ dev_scripts/test_python_api.py | 74 ++++++++++++++ 5 files changed, 291 insertions(+), 2 deletions(-) create mode 100755 dev_scripts/build_c_test.sh create mode 100644 dev_scripts/test_c_api.c create mode 100755 dev_scripts/test_python_api.py diff --git a/.gitignore b/.gitignore index 9edc9823c..c9d5349f7 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,4 @@ docs/cuopt/build cpp/include/cuopt/semantic_version.hpp !datasets/quadratic_programming !datasets/quadratic_programming/** +dev_scripts/test_c_api diff --git a/cpp/cuopt_remote_server.cpp b/cpp/cuopt_remote_server.cpp index 764e684ac..5bab0bbd4 100644 --- a/cpp/cuopt_remote_server.cpp +++ b/cpp/cuopt_remote_server.cpp @@ -485,7 +485,7 @@ void delete_log_file(const std::string& job_id); // Forward declarations for pipe I/O helpers // ============================================================================ static bool write_to_pipe(int fd, const void* data, size_t size); -static bool read_from_pipe(int fd, void* data, size_t size); +static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms = 120000); static bool send_job_data_pipe(int worker_idx, const std::vector& data); static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector& data); static bool send_result_pipe(int fd, const std::vector& data); @@ -1335,7 +1335,7 @@ static bool write_to_pipe(int fd, const void* data, size_t size) // Read all data from a pipe (handles partial reads) with timeout // timeout_ms: milliseconds to wait for data (default 120000 = 2 minutes) -static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms = 120000) +static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms) { uint8_t* ptr = static_cast(data); size_t remaining = size; diff --git a/dev_scripts/build_c_test.sh b/dev_scripts/build_c_test.sh new file mode 100755 index 000000000..a1558536b --- /dev/null +++ b/dev_scripts/build_c_test.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Build the C API test program +# +# Usage: +# ./build_c_test.sh +# +# Prerequisites: +# - Activate conda environment with cuopt installed +# - CONDA_PREFIX must be set + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +if [ -z "$CONDA_PREFIX" ]; then + echo "Error: CONDA_PREFIX not set. Activate a conda environment first." + exit 1 +fi + +echo "Building test_c_api..." +gcc -I "$CONDA_PREFIX/include" \ + -L "$CONDA_PREFIX/lib" \ + -Wl,-rpath,"$CONDA_PREFIX/lib" \ + -o "$SCRIPT_DIR/test_c_api" \ + "$SCRIPT_DIR/test_c_api.c" \ + -lcuopt + +echo "Built: $SCRIPT_DIR/test_c_api" +echo "" +echo "Usage:" +echo " # Local solve:" +echo " $SCRIPT_DIR/test_c_api /path/to/problem.mps" +echo "" +echo " # Remote solve:" +echo " CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 $SCRIPT_DIR/test_c_api /path/to/problem.mps" diff --git a/dev_scripts/test_c_api.c b/dev_scripts/test_c_api.c new file mode 100644 index 000000000..ae800b9cd --- /dev/null +++ b/dev_scripts/test_c_api.c @@ -0,0 +1,179 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * Test script for cuOpt C API - works with both local and remote solve. + * + * Usage: + * # Local solve (default): + * ./test_c_api /path/to/problem.mps + * + * # Remote solve (set environment variables first): + * CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 ./test_c_api /path/to/problem.mps + * + * Build: + * gcc -I $CONDA_PREFIX/include -L $CONDA_PREFIX/lib -Wl,-rpath,$CONDA_PREFIX/lib \ + * -o test_c_api test_c_api.c -lcuopt + * + * Example: + * ./test_c_api /home/tmckay/repos/HiGHS/check/instances/afiro.mps + */ + +#include +#include +#include + +const char* termination_status_to_string(cuopt_int_t termination_status) +{ + switch (termination_status) { + case CUOPT_TERIMINATION_STATUS_OPTIMAL: + return "Optimal"; + case CUOPT_TERIMINATION_STATUS_INFEASIBLE: + return "Infeasible"; + case CUOPT_TERIMINATION_STATUS_UNBOUNDED: + return "Unbounded"; + case CUOPT_TERIMINATION_STATUS_ITERATION_LIMIT: + return "Iteration limit"; + case CUOPT_TERIMINATION_STATUS_TIME_LIMIT: + return "Time limit"; + case CUOPT_TERIMINATION_STATUS_NUMERICAL_ERROR: + return "Numerical error"; + case CUOPT_TERIMINATION_STATUS_PRIMAL_FEASIBLE: + return "Primal feasible"; + case CUOPT_TERIMINATION_STATUS_FEASIBLE_FOUND: + return "Feasible found"; + default: + return "Unknown"; + } +} + +cuopt_int_t solve_mps_file(const char* filename) +{ + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + cuopt_int_t status; + cuopt_float_t time; + cuopt_int_t termination_status; + cuopt_float_t objective_value; + cuopt_int_t num_variables; + cuopt_float_t* solution_values = NULL; + + // Check for remote solve configuration + const char* remote_host = getenv("CUOPT_REMOTE_HOST"); + const char* remote_port = getenv("CUOPT_REMOTE_PORT"); + if (remote_host && remote_port) { + printf("Remote solve enabled: %s:%s\n", remote_host, remote_port); + } else { + printf("Local solve (no CUOPT_REMOTE_HOST/PORT set)\n"); + } + + printf("Reading MPS file: %s\n", filename); + + // Create the problem from MPS file + status = cuOptReadProblem(filename, &problem); + if (status != CUOPT_SUCCESS) { + printf("Error creating problem from MPS file: %d\n", status); + goto DONE; + } + + // Get problem size + status = cuOptGetNumVariables(problem, &num_variables); + if (status != CUOPT_SUCCESS) { + printf("Error getting number of variables: %d\n", status); + goto DONE; + } + + // Create solver settings + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto DONE; + } + + // Set solver parameters + status = cuOptSetFloatParameter(settings, CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, 1e-6); + if (status != CUOPT_SUCCESS) { + printf("Error setting optimality tolerance: %d\n", status); + goto DONE; + } + + // Solve the problem + printf("Solving...\n"); + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving problem: %d\n", status); + goto DONE; + } + + // Get solution information + status = cuOptGetSolveTime(solution, &time); + if (status != CUOPT_SUCCESS) { + printf("Error getting solve time: %d\n", status); + goto DONE; + } + + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status: %d\n", status); + goto DONE; + } + + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto DONE; + } + + // Print results + printf("\nResults:\n"); + printf("----------------------------------------\n"); + printf("Number of variables: %d\n", num_variables); + printf("Status: %s (%d)\n", termination_status_to_string(termination_status), termination_status); + printf("Objective: %e\n", objective_value); + printf("Solve time: %.3f seconds\n", time); + + // Get and print solution variables + solution_values = (cuopt_float_t*)malloc(num_variables * sizeof(cuopt_float_t)); + status = cuOptGetPrimalSolution(solution, solution_values); + if (status != CUOPT_SUCCESS) { + printf("Error getting solution values: %d\n", status); + goto DONE; + } + + printf("\nPrimal solution (first 10 of %d variables):\n", num_variables); + for (cuopt_int_t i = 0; i < (num_variables < 10 ? num_variables : 10); i++) { + printf(" x%d = %f\n", i + 1, solution_values[i]); + } + if (num_variables > 10) { + printf(" ... (%d more variables)\n", num_variables - 10); + } + + printf("\nDone!\n"); + +DONE: + free(solution_values); + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + + return status; +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + printf("\nSet CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT for remote solve.\n"); + return 1; + } + + cuopt_int_t status = solve_mps_file(argv[1]); + + if (status == CUOPT_SUCCESS) { + return 0; + } else { + printf("\nSolver failed with status: %d\n", status); + return 1; + } +} diff --git a/dev_scripts/test_python_api.py b/dev_scripts/test_python_api.py new file mode 100755 index 000000000..b01fa506c --- /dev/null +++ b/dev_scripts/test_python_api.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Test script for cuOpt Python API - works with both local and remote solve. + +Usage: + # Local solve (default): + python test_python_api.py /path/to/problem.mps + + # Remote solve (set environment variables first): + CUOPT_REMOTE_HOST=localhost CUOPT_REMOTE_PORT=9090 python test_python_api.py /path/to/problem.mps + +Example: + python test_python_api.py /home/tmckay/repos/HiGHS/check/instances/afiro.mps +""" + +import os +import sys + +import cuopt_mps_parser +from cuopt.linear_programming import solver, solver_settings + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + mps_file = sys.argv[1] + if not os.path.exists(mps_file): + print(f"Error: File not found: {mps_file}") + sys.exit(1) + + # Check if remote solve is configured + remote_host = os.environ.get("CUOPT_REMOTE_HOST") + remote_port = os.environ.get("CUOPT_REMOTE_PORT") + if remote_host and remote_port: + print(f"Remote solve enabled: {remote_host}:{remote_port}") + else: + print("Local solve (no CUOPT_REMOTE_HOST/PORT set)") + + print(f"Reading MPS file: {mps_file}") + + # Parse MPS file into DataModel + data_model = cuopt_mps_parser.ParseMps(mps_file) + + # Create solver settings + settings = solver_settings.SolverSettings() + settings.set_optimality_tolerance(1e-6) + + # Solve + print("Solving...") + solution = solver.Solve(data_model, settings) + + # Print results + print("\nResults:") + print("-" * 40) + print(f"Status: {solution.get_termination_reason()}") + print(f"Objective: {solution.get_primal_objective():.6e}") + print(f"Solve time: {solution.get_solve_time():.3f} seconds") + + # Print first few solution values + primal = solution.get_primal_solution() + if len(primal) > 0: + print(f"\nPrimal solution (first 10 of {len(primal)} variables):") + for i, val in enumerate(primal[:10]): + print(f" x{i+1} = {val:.6f}") + if len(primal) > 10: + print(f" ... ({len(primal) - 10} more variables)") + + print("\nDone!") + + +if __name__ == "__main__": + main() From 28ef558a2e4144914925f87d0ab1f6d2d4a99273 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 16:29:02 -0500 Subject: [PATCH 32/37] Add libprotobuf dependency to CI build - Add libprotobuf to conda recipe build and host requirements - Add libprotobuf to dependencies.yaml build_cpp section - Regenerate conda environment files with libprotobuf Fixes CI build failure where protobuf library was not found during CMake configuration for remote solve server implementation. --- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + .../all_cuda-130_arch-x86_64.yaml.orig | 82 +++++++++++++++++++ .../all_cuda-131_arch-aarch64.yaml | 1 + .../all_cuda-131_arch-x86_64.yaml | 1 + conda/recipes/libcuopt/recipe.yaml | 2 + dependencies.yaml | 1 + 7 files changed, 89 insertions(+) create mode 100644 conda/environments/all_cuda-130_arch-x86_64.yaml.orig diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 0594f8106..8a362516b 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -32,6 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- libprotobuf - libraft-headers==26.2.*,>=0.0.0a0 - librmm==26.2.*,>=0.0.0a0 - make diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 55b3d3dfb..248c74ce9 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -32,6 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- libprotobuf - libraft-headers==26.2.*,>=0.0.0a0 - librmm==26.2.*,>=0.0.0a0 - make diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml.orig b/conda/environments/all_cuda-130_arch-x86_64.yaml.orig new file mode 100644 index 000000000..68ac02cdd --- /dev/null +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml.orig @@ -0,0 +1,82 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai-nightly +- rapidsai +- conda-forge +dependencies: +- boost +- breathe +- bzip2 +- c-compiler +- ccache +- clang-tools=20.1.4 +- clang==20.1.4 +- cmake>=3.30.4 +- cpp-argparse +- cuda-nvcc +- cuda-nvtx-dev +- cuda-python>=13.0.1,<14.0a0 +- cuda-sanitizer-api +- cuda-version=13.0 +- cudf==26.2.*,>=0.0.0a0 +- cupy>=13.6.0 +- cxx-compiler +- cython>=3.0.3 +- doxygen=1.9.1 +- exhale +- fastapi +- gcc_linux-64=14.* +- gmock +- gtest +- ipython +- jsonref==1.1.0 +- libcudss-dev >=0.7 +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- libraft-headers==26.2.*,>=0.0.0a0 +- librmm==26.2.*,>=0.0.0a0 +- make +- msgpack-numpy==0.4.8 +- msgpack-python==1.1.0 +- myst-nb +- myst-parser +- ninja +- notebook +- numba-cuda>=0.19.1,<0.20.0a0 +- numba>=0.60.0 +- numpy>=1.23.5,<3.0a0 +- numpydoc +- pandas>=2.0 +- pexpect +- pip +- pre-commit +- psutil>=6.0.0 +- pylibraft==26.2.*,>=0.0.0a0 +- pyrsistent +- pytest-cov +- pytest<8 +- python>=3.10,<3.14 +- rapids-build-backend>=0.4.0,<0.5.0.dev0 +- rapids-dask-dependency==26.2.*,>=0.0.0a0 +- rapids-logger==0.2.*,>=0.0.0a0 +- requests +- rmm==26.2.*,>=0.0.0a0 +- scikit-build-core>=0.10.0 +- sphinx +- sphinx-copybutton +- sphinx-design +- sphinx-markdown-tables +- sphinx_rtd_theme +- sphinxcontrib-openapi +- sphinxcontrib-websupport +- sysroot_linux-64==2.28 +- tbb-devel +- uvicorn==0.34.* +- zlib +- pip: + - nvidia_sphinx_theme + - swagger-plugin-for-sphinx + - veroviz +name: all_cuda-130_arch-x86_64 diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index da9337c83..9475b5109 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -32,6 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- libprotobuf - libraft-headers==26.2.*,>=0.0.0a0 - librmm==26.2.*,>=0.0.0a0 - make diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index c2e8d7dbc..bc99d55f9 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -32,6 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev +- libprotobuf - libraft-headers==26.2.*,>=0.0.0a0 - librmm==26.2.*,>=0.0.0a0 - make diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml index 8e56c6934..f9e0761a7 100644 --- a/conda/recipes/libcuopt/recipe.yaml +++ b/conda/recipes/libcuopt/recipe.yaml @@ -62,6 +62,7 @@ cache: - tbb-devel - zlib - bzip2 + - libprotobuf host: - cpp-argparse - cuda-version =${{ cuda_version }} @@ -76,6 +77,7 @@ cache: - tbb-devel - zlib - bzip2 + - libprotobuf outputs: - package: diff --git a/dependencies.yaml b/dependencies.yaml index 09faffe52..cea47b096 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -302,6 +302,7 @@ dependencies: - tbb-devel - zlib - bzip2 + - libprotobuf test_cpp: common: - output_types: [conda] From 3b4e741f00b75c9f51661a33515c8287f28fb0cd Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Tue, 13 Jan 2026 16:56:18 -0500 Subject: [PATCH 33/37] Add protobuf to Python wheel build dependencies - Add depends_on_protobuf section to dependencies.yaml - Include protobuf in py_rapids_build_libcuopt requirements - Regenerate python/libcuopt/pyproject.toml with protobuf Fixes Python wheel CI build failure where CMake could not find protobuf during the libcuopt wheel build. The previous commit added protobuf to conda builds, this commit adds it to pip wheel builds. --- dependencies.yaml | 6 ++++++ python/libcuopt/pyproject.toml | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index cea47b096..6593e695c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -137,6 +137,7 @@ files: - depends_on_librmm - depends_on_rapids_logger - depends_on_mps_parser + - depends_on_protobuf py_run_libcuopt: output: pyproject pyproject_dir: python/libcuopt @@ -505,6 +506,11 @@ dependencies: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + depends_on_protobuf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - protobuf depends_on_libraft_headers: common: - output_types: conda diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 7a7d84b8f..1091d12a9 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 [build-system] @@ -84,5 +84,6 @@ requires = [ "cuopt-mps-parser==26.2.*,>=0.0.0a0", "librmm==26.2.*,>=0.0.0a0", "ninja", + "protobuf", "rapids-logger==0.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 0dcd54aaf97656bf01479d2839764f5a5aad3d49 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 14 Jan 2026 09:58:08 -0600 Subject: [PATCH 34/37] Add protbuf installation --- ci/build_wheel_libcuopt.sh | 3 +++ ci/utils/install_protobuf.sh | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100755 ci/utils/install_protobuf.sh diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh index 640562ed3..dec5d1c9e 100755 --- a/ci/build_wheel_libcuopt.sh +++ b/ci/build_wheel_libcuopt.sh @@ -17,6 +17,9 @@ fi # Install Boost and TBB bash ci/utils/install_boost_tbb.sh +# Install Protobuf +bash ci/utils/install_protobuf.sh + export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON" # For pull requests we are enabling assert mode. diff --git a/ci/utils/install_protobuf.sh b/ci/utils/install_protobuf.sh new file mode 100755 index 000000000..0c678ccbb --- /dev/null +++ b/ci/utils/install_protobuf.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# Install Protobuf development libraries +if [ -f /etc/os-release ]; then + . /etc/os-release + if [[ "$ID" == "rocky" ]]; then + echo "Detected Rocky Linux. Installing Protobuf via dnf..." + dnf install -y protobuf-devel protobuf-compiler + elif [[ "$ID" == "ubuntu" ]]; then + echo "Detected Ubuntu. Installing Protobuf via apt..." + apt-get update + apt-get install -y libprotobuf-dev protobuf-compiler + else + echo "Unknown OS: $ID. Please install Protobuf development libraries manually." + exit 1 + fi +else + echo "/etc/os-release not found. Cannot determine OS. Please install Protobuf development libraries manually." + exit 1 +fi From 1a2227b7beeb44cae4dfd732874b9cb2f25afe45 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 14 Jan 2026 12:04:30 -0600 Subject: [PATCH 35/37] update --- ci/utils/install_protobuf.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/utils/install_protobuf.sh b/ci/utils/install_protobuf.sh index 0c678ccbb..c17a3bb01 100755 --- a/ci/utils/install_protobuf.sh +++ b/ci/utils/install_protobuf.sh @@ -10,6 +10,12 @@ if [ -f /etc/os-release ]; then . /etc/os-release if [[ "$ID" == "rocky" ]]; then echo "Detected Rocky Linux. Installing Protobuf via dnf..." + # Enable PowerTools (Rocky 8) or CRB (Rocky 9) repository for protobuf-devel + if [[ "${VERSION_ID%%.*}" == "8" ]]; then + dnf config-manager --set-enabled powertools || dnf config-manager --set-enabled PowerTools || true + elif [[ "${VERSION_ID%%.*}" == "9" ]]; then + dnf config-manager --set-enabled crb || true + fi dnf install -y protobuf-devel protobuf-compiler elif [[ "$ID" == "ubuntu" ]]; then echo "Detected Ubuntu. Installing Protobuf via apt..." From 6fda94249ba7b4e30cd3d72b448c494144c0851d Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 14 Jan 2026 15:06:41 -0600 Subject: [PATCH 36/37] fix warning failure --- cpp/CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 954270df7..be1e366cd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -139,6 +139,12 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false") endif() list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror --default-stream=per-thread) +# GCC (especially newer versions) can emit false-positive -Warray-bounds diagnostics from +# 3rd-party headers (e.g., Protobuf) that get promoted to hard errors via -Werror above. +# Keep -Werror for our code but do not fail the build on this specific diagnostic. +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wno-error=array-bounds -Xcompiler=-Wno-array-bounds) +endif() if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall) else() @@ -227,7 +233,7 @@ find_package(CUDSS REQUIRED) # Protocol Buffers for remote solve serialization find_package(Protobuf REQUIRED) -include_directories(${Protobuf_INCLUDE_DIRS}) +include_directories(SYSTEM ${Protobuf_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) # Generate C++ code from .proto file From 235323c4e1338c0be3e9dc75e59eb879cce21cd7 Mon Sep 17 00:00:00 2001 From: Ramakrishna Prabhu Date: Wed, 14 Jan 2026 17:03:18 -0600 Subject: [PATCH 37/37] fix cmake --- cpp/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index be1e366cd..a06ed705d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -144,6 +144,8 @@ list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated- # Keep -Werror for our code but do not fail the build on this specific diagnostic. if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wno-error=array-bounds -Xcompiler=-Wno-array-bounds) + # Protobuf headers can also trigger GCC -Wstringop-overread false-positives under heavy inlining. + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wno-error=stringop-overread -Xcompiler=-Wno-stringop-overread) endif() if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall)