From fe5f33397c3ab64f8104ad19b73a49c8b4c94ade Mon Sep 17 00:00:00 2001 From: "Nicolas L. Guidotti" Date: Fri, 6 Feb 2026 15:58:06 +0100 Subject: [PATCH] Scheduler-Worker Model + Reliability Branching (#766) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR implements the schedule-worker model for B&B, where one thread (the scheduler) is responsible for scheduling tasks to the other threads (the workers). With this model, the CPU resources can be shared efficiently among the different tasks within the MIP solver. It also allows the scheduling policy to be changed in runtime (e.g., in the future, the solver can dynamically set the number of workers allocated to each diving heuristics at runtime). This also implements a parallel reliability branching (Section 5.7 from [1]). This also fixes the incorrect pseudocost update in each node in the B&B tree. Closes #526. Closes #445. Closes #700. Performance over the MIPLIB2017 dataset (GH200): ``` ================================================================================ main - 046eafd (1) vs reliability-branching-cuts (2) ================================================================================ Feasible solutions = 225 vs 224 (-1) Optimal solutions = 44 vs 68 (+24) Solutions with <0.1% primal gap = 109 vs 125 (+16) Average nodes explored = 7298973 vs 4300451 (-2998522, -41.081%) Shifted geomean for MIP gap = 0.2979 vs 0.2374 (-0.0605, -20.302%) Average primal gap = 11.9935 vs 11.0833 (-0.9102, -7.589%) Average primal integral per time = 19.3326 vs 30.7054 (+11.3728, +37.038%) ================================================================================ ``` ## Reference [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, Berlin, 2007. doi: [10.14279/depositonce-1634](https://doi.org/10.14279/depositonce-1634). Authors: - Nicolas L. Guidotti (https://github.com/nguidotti) - Chris Maes (https://github.com/chris-maes) Approvers: - Chris Maes (https://github.com/chris-maes) URL: https://github.com/NVIDIA/cuopt/pull/766 --- .../linear_programming/cuopt/run_mip.cpp | 25 +- .../cuopt/linear_programming/constants.h | 1 + .../mip/solver_settings.hpp | 1 + cpp/src/dual_simplex/basis_updates.hpp | 5 + cpp/src/dual_simplex/branch_and_bound.cpp | 844 +++++++----------- cpp/src/dual_simplex/branch_and_bound.hpp | 124 +-- .../dual_simplex/branch_and_bound_worker.hpp | 280 ++++++ cpp/src/dual_simplex/diving_heuristics.cpp | 15 +- cpp/src/dual_simplex/mip_node.hpp | 16 +- cpp/src/dual_simplex/node_queue.hpp | 11 +- cpp/src/dual_simplex/presolve.cpp | 2 +- cpp/src/dual_simplex/pseudo_costs.cpp | 353 ++++++-- cpp/src/dual_simplex/pseudo_costs.hpp | 74 +- .../dual_simplex/simplex_solver_settings.hpp | 30 +- cpp/src/dual_simplex/solution.hpp | 4 +- cpp/src/math_optimization/solver_settings.cu | 1 + cpp/src/mip/diversity/lns/rins.cu | 21 +- cpp/src/mip/diversity/recombiners/sub_mip.cuh | 19 +- cpp/src/mip/solver.cu | 19 +- cpp/src/utilities/omp_helpers.hpp | 42 +- cpp/src/utilities/pcgenerator.hpp | 159 ++++ 21 files changed, 1318 insertions(+), 728 deletions(-) create mode 100644 cpp/src/dual_simplex/branch_and_bound_worker.hpp create mode 100644 cpp/src/utilities/pcgenerator.hpp diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 213e38e5e..308c7087b 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -147,6 +147,7 @@ int run_single_file(std::string file_path, int num_cpu_threads, bool write_log_file, bool log_to_console, + int reliability_branching, double time_limit) { const raft::handle_t handle_{}; @@ -196,7 +197,6 @@ int run_single_file(std::string file_path, } } } - settings.time_limit = time_limit; settings.heuristics_only = heuristics_only; settings.num_cpu_threads = num_cpu_threads; @@ -204,6 +204,7 @@ int run_single_file(std::string file_path, settings.tolerances.relative_tolerance = 1e-12; settings.tolerances.absolute_tolerance = 1e-6; settings.presolve = true; + settings.reliability_branching = reliability_branching; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; auto start_run_solver = std::chrono::high_resolution_clock::now(); @@ -256,6 +257,7 @@ void run_single_file_mp(std::string file_path, int num_cpu_threads, bool write_log_file, bool log_to_console, + int reliability_branching, double time_limit) { std::cout << "running file " << file_path << " on gpu : " << device << std::endl; @@ -271,6 +273,7 @@ void run_single_file_mp(std::string file_path, num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); // this is a bad design to communicate the result but better than adding complexity of IPC or // pipes @@ -354,6 +357,11 @@ int main(int argc, char* argv[]) .help("track allocations (t/f)") .default_value(std::string("f")); + program.add_argument("--reliability-branching") + .help("reliability branching: -1 (automatic), 0 (disable) or k > 0 (use k)") + .scan<'i', int>() + .default_value(-1); + // Parse arguments try { program.parse_args(argc, argv); @@ -376,12 +384,13 @@ int main(int argc, char* argv[]) std::string result_file; int batch_num = -1; - bool heuristics_only = program.get("--heuristics-only")[0] == 't'; - int num_cpu_threads = program.get("--num-cpu-threads"); - bool write_log_file = program.get("--write-log-file")[0] == 't'; - bool log_to_console = program.get("--log-to-console")[0] == 't'; - double memory_limit = program.get("--memory-limit"); - bool track_allocations = program.get("--track-allocations")[0] == 't'; + bool heuristics_only = program.get("--heuristics-only")[0] == 't'; + int num_cpu_threads = program.get("--num-cpu-threads"); + bool write_log_file = program.get("--write-log-file")[0] == 't'; + bool log_to_console = program.get("--log-to-console")[0] == 't'; + double memory_limit = program.get("--memory-limit"); + bool track_allocations = program.get("--track-allocations")[0] == 't'; + int reliability_branching = program.get("--reliability-branching"); if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } @@ -469,6 +478,7 @@ int main(int argc, char* argv[]) num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); } else if (sys_pid < 0) { std::cerr << "Fork failed!" << std::endl; @@ -509,6 +519,7 @@ int main(int argc, char* argv[]) num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); } diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 7caf7aeeb..c52c22c85 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -57,6 +57,7 @@ #define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" #define CUOPT_MIP_SCALING "mip_scaling" #define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" #define CUOPT_MIP_CUT_PASSES "mip_cut_passes" #define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" #define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 863e5d66d..c5c26884f 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -86,6 +86,7 @@ class mip_solver_settings_t { f_t time_limit = std::numeric_limits::infinity(); i_t node_limit = std::numeric_limits::max(); bool heuristics_only = false; + i_t reliability_branching = -1; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound i_t max_cut_passes = 10; // number of cut passes to make i_t mir_cuts = -1; diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 8eca3ba8a..d9783053f 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -223,6 +223,9 @@ class basis_update_mpf_t { reset_stats(); } + basis_update_mpf_t(const basis_update_mpf_t& other) = default; + basis_update_mpf_t& operator=(const basis_update_mpf_t& other) = default; + void print_stats() const { i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; @@ -381,6 +384,8 @@ class basis_update_mpf_t { std::vector& nonbasic_list, std::vector& vstatus); + void set_refactor_frequency(i_t new_frequency) { refactor_frequency_ = new_frequency; } + private: void clear() { diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index d35415904..c6e8b0d17 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -201,26 +201,26 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } #ifdef SHOW_DIVING_TYPE -inline char feasible_solution_symbol(bnb_worker_type_t type) +inline char feasible_solution_symbol(search_strategy_t strategy) { - switch (type) { - case bnb_worker_type_t::BEST_FIRST: return 'B'; - case bnb_worker_type_t::COEFFICIENT_DIVING: return 'C'; - case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'L'; - case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'P'; - case bnb_worker_type_t::GUIDED_DIVING: return 'G'; + switch (strategy) { + case search_strategy_t::BEST_FIRST: return 'B'; + case search_strategy_t::COEFFICIENT_DIVING: return 'C'; + case search_strategy_t::LINE_SEARCH_DIVING: return 'L'; + case search_strategy_t::PSEUDOCOST_DIVING: return 'P'; + case search_strategy_t::GUIDED_DIVING: return 'G'; default: return 'U'; } } #else -inline char feasible_solution_symbol(bnb_worker_type_t type) +inline char feasible_solution_symbol(search_strategy_t strategy) { - switch (type) { - case bnb_worker_type_t::BEST_FIRST: return 'B'; - case bnb_worker_type_t::COEFFICIENT_DIVING: return 'D'; - case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'D'; - case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'D'; - case bnb_worker_type_t::GUIDED_DIVING: return 'D'; + switch (strategy) { + case search_strategy_t::BEST_FIRST: return 'B'; + case search_strategy_t::COEFFICIENT_DIVING: return 'D'; + case search_strategy_t::LINE_SEARCH_DIVING: return 'D'; + case search_strategy_t::PSEUDOCOST_DIVING: return 'D'; + case search_strategy_t::GUIDED_DIVING: return 'D'; default: return 'U'; } } @@ -273,7 +273,8 @@ branch_and_bound_t::branch_and_bound_t( } #endif - upper_bound_ = inf; + upper_bound_ = inf; + root_objective_ = std::numeric_limits::quiet_NaN(); } template @@ -282,18 +283,21 @@ f_t branch_and_bound_t::get_lower_bound() f_t lower_bound = lower_bound_ceiling_.load(); f_t heap_lower_bound = node_queue_.get_lower_bound(); lower_bound = std::min(heap_lower_bound, lower_bound); + lower_bound = std::min(worker_pool_.get_lower_bound(), lower_bound); - for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { - lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); + if (std::isfinite(lower_bound)) { + return lower_bound; + } else if (std::isfinite(root_objective_)) { + return root_objective_; + } else { + return -inf; } - - return std::isfinite(lower_bound) ? lower_bound : -inf; } template void branch_and_bound_t::report_heuristic(f_t obj) { - if (is_running) { + if (is_running_) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(user_obj, user_lower); @@ -671,7 +675,7 @@ template void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - bnb_worker_type_t thread_type) + search_strategy_t thread_type) { bool send_solution = false; @@ -720,91 +724,64 @@ template branch_variable_t branch_and_bound_t::variable_selection( mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - bnb_worker_type_t type) + branch_and_bound_worker_t* worker) { logger_t log; log.log = false; i_t branch_var = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; std::vector current_incumbent; + std::vector& solution = worker->leaf_solution.x; + + switch (worker->search_strategy) { + case search_strategy_t::BEST_FIRST: + + if (settings_.reliability_branching != 0) { + branch_var = pc_.reliable_variable_selection(node_ptr, + fractional, + solution, + settings_, + var_types_, + worker, + exploration_stats_, + upper_bound_, + worker_pool_.num_idle_workers(), + log); + } else { + branch_var = pc_.variable_selection(fractional, solution, log); + } - // If there is no incumbent, use pseudocost diving instead of guided diving - if (upper_bound_ == inf && type == bnb_worker_type_t::GUIDED_DIVING) { - type = bnb_worker_type_t::PSEUDOCOST_DIVING; - } + round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); - switch (type) { - case bnb_worker_type_t::BEST_FIRST: - branch_var = pc_.variable_selection(fractional, solution, log); - round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); return {branch_var, round_dir}; - case bnb_worker_type_t::COEFFICIENT_DIVING: + case search_strategy_t::COEFFICIENT_DIVING: return coefficient_diving( original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); - case bnb_worker_type_t::LINE_SEARCH_DIVING: + case search_strategy_t::LINE_SEARCH_DIVING: return line_search_diving(fractional, solution, root_relax_soln_.x, log); - case bnb_worker_type_t::PSEUDOCOST_DIVING: + case search_strategy_t::PSEUDOCOST_DIVING: return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); - case bnb_worker_type_t::GUIDED_DIVING: + case search_strategy_t::GUIDED_DIVING: mutex_upper_.lock(); current_incumbent = incumbent_.x; mutex_upper_.unlock(); return guided_diving(pc_, fractional, solution, current_incumbent, log); default: - log.debug("Unknown variable selection method: %d\n", type); + log.debug("Unknown variable selection method: %d\n", worker->search_strategy); return {-1, rounding_direction_t::NONE}; } } -template -void branch_and_bound_t::initialize_diving_heuristics_settings( - std::vector& diving_strategies) -{ - diving_strategies.reserve(4); - - if (settings_.diving_settings.pseudocost_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING); - } - - if (settings_.diving_settings.line_search_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING); - } - - if (settings_.diving_settings.guided_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING); - } - - if (settings_.diving_settings.coefficient_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING); - calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); - } - - if (diving_strategies.empty()) { - settings_.log.printf("Warning: All diving heuristics are disabled!\n"); - } -} - template dual::status_t branch_and_bound_t::solve_node_lp( mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - bnb_worker_type_t thread_type, - bool recompute_bounds_and_basis, - const std::vector& root_lower, - const std::vector& root_upper, - bnb_stats_t& stats, + branch_and_bound_worker_t* worker, + branch_and_bound_stats_t& stats, logger_t& log) { #ifdef DEBUG_BRANCHING @@ -843,7 +820,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( #endif std::vector& leaf_vstatus = node_ptr->vstatus; - assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(leaf_vstatus.size() == worker->leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); @@ -852,10 +829,10 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; - if (thread_type != bnb_worker_type_t::BEST_FIRST) { - i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + if (worker->search_strategy != search_strategy_t::BEST_FIRST) { + int64_t bnb_lp_iters = exploration_stats_.total_lp_iters; f_t factor = settings_.diving_settings.iteration_limit_factor; - i_t max_iter = factor * bnb_lp_iters; + int64_t max_iter = factor * bnb_lp_iters; lp_settings.iteration_limit = max_iter - stats.total_lp_iters; if (lp_settings.iteration_limit <= 0) { return dual::status_t::ITERATION_LIMIT; } } @@ -882,24 +859,9 @@ dual::status_t branch_and_bound_t::solve_node_lp( node_ptr->vstatus[node_ptr->branch_var]); #endif - // Reset the bound_changed markers - std::vector bounds_changed(original_lp_.num_cols, false); - - // Set the correct bounds for the leaf problem - if (recompute_bounds_and_basis) { - leaf_problem.lower = root_lower; - leaf_problem.upper = root_upper; - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - } else { - node_ptr->update_branched_variable_bounds( - leaf_problem.lower, leaf_problem.upper, bounds_changed); - } - - bool feasible = node_presolver.bounds_strengthening( - lp_settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); - + bool feasible = worker->set_lp_variable_bounds(node_ptr, settings_); dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + worker->leaf_edge_norms = edge_norms_; if (feasible) { i_t node_iter = 0; @@ -907,29 +869,29 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_status = dual_phase2_with_advanced_basis(2, 0, - recompute_bounds_and_basis, + worker->recompute_basis, lp_start_time, - leaf_problem, + worker->leaf_problem, lp_settings, leaf_vstatus, - basis_factors, - basic_list, - nonbasic_list, - leaf_solution, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + worker->leaf_solution, node_iter, - leaf_edge_norms); + worker->leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { log.debug("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); - lp_status_t second_status = solve_linear_program_with_advanced_basis(leaf_problem, + lp_status_t second_status = solve_linear_program_with_advanced_basis(worker->leaf_problem, lp_start_time, lp_settings, - leaf_solution, - basis_factors, - basic_list, - nonbasic_list, + worker->leaf_solution, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, leaf_vstatus, - leaf_edge_norms); + worker->leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } @@ -944,20 +906,18 @@ dual::status_t branch_and_bound_t::solve_node_lp( return lp_status; } - template std::pair branch_and_bound_t::update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - bnb_worker_type_t thread_type, + branch_and_bound_worker_t* worker, dual::status_t lp_status, logger_t& log) { const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; std::vector& leaf_vstatus = node_ptr->vstatus; + lp_problem_t& leaf_problem = worker->leaf_problem; + lp_solution_t& leaf_solution = worker->leaf_solution; if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch @@ -997,12 +957,12 @@ std::pair branch_and_bound_t::upd } #endif - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - node_ptr->lower_bound = leaf_objective; + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); pc_.update_pseudo_costs(node_ptr, leaf_objective); + node_ptr->lower_bound = leaf_objective; - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { if (settings_.node_processed_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); @@ -1012,15 +972,15 @@ std::pair branch_and_bound_t::upd if (leaf_num_fractional == 0) { // Found a integer feasible solution - add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); + add_feasible_solution( + leaf_objective, leaf_solution.x, node_ptr->depth, worker->search_strategy); search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); search_tree.update(node_ptr, node_status_t::INTEGER_FEASIBLE); return {node_status_t::INTEGER_FEASIBLE, rounding_direction_t::NONE}; } else if (leaf_objective <= upper_bound_ + abs_fathom_tol) { // Choose fractional variable to branch on - auto [branch_var, round_dir] = - variable_selection(node_ptr, leaf_fractional, leaf_solution.x, thread_type); + auto [branch_var, round_dir] = variable_selection(node_ptr, leaf_fractional, worker); assert(leaf_vstatus.size() == leaf_problem.num_cols); assert(branch_var >= 0); @@ -1029,7 +989,7 @@ std::pair branch_and_bound_t::upd // Note that the exploration thread is the only one that can insert new nodes into the heap, // and thus, we only need to calculate the objective estimate here (it is used for // sorting the nodes for diving). - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { logger_t pc_log; pc_log.log = false; node_ptr->objective_estimate = @@ -1052,7 +1012,7 @@ std::pair branch_and_bound_t::upd return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } } else { - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " @@ -1070,136 +1030,19 @@ std::pair branch_and_bound_t::upd } template -void branch_and_bound_t::exploration_ramp_up(mip_node_t* node, - i_t initial_heap_size) +void branch_and_bound_t::plunge_with(branch_and_bound_worker_t* worker) { - if (solver_status_ != mip_status_t::UNSET) { return; } - - // Note that we do not know which thread will execute the - // `exploration_ramp_up` task, so we allow to any thread - // to repair the heuristic solution. - repair_heuristic_solutions(); - - f_t lower_bound = node->lower_bound; - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = upper_bound - lower_bound; - - if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree_.graphviz_node(settings_.log, node, "cutoff", node->lower_bound); - search_tree_.update(node, node_status_t::FATHOMED); - --exploration_stats_.nodes_unexplored; - return; - } - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 10 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - (time_since_last_log >= 1)) || - (time_since_last_log > 30) || now > settings_.time_limit) { - bool should_report = should_report_.exchange(false); - - if (should_report) { - report(' ', upper_bound, root_objective_, node->depth, node->integer_infeasible); - exploration_stats_.nodes_since_last_log = 0; - exploration_stats_.last_log = tic(); - should_report_ = true; - } - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - return; - } - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; - - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - bnb_worker_type_t::BEST_FIRST, - true, - original_lp_.lower, - original_lp_.upper, - exploration_stats_, - settings_.log); - if (lp_status == dual::status_t::TIME_LIMIT) { - solver_status_ = mip_status_t::TIME_LIMIT; - return; - } - - ++exploration_stats_.nodes_since_last_log; - ++exploration_stats_.nodes_explored; - --exploration_stats_.nodes_unexplored; - - auto [node_status, round_dir] = update_tree(node, - search_tree_, - leaf_problem, - leaf_solution, - leaf_edge_norms, - bnb_worker_type_t::BEST_FIRST, - lp_status, - settings_.log); - - if (node_status == node_status_t::HAS_CHILDREN) { - exploration_stats_.nodes_unexplored += 2; - - // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase - if (exploration_stats_.nodes_unexplored < initial_heap_size) { -#pragma omp task - exploration_ramp_up(node->get_down_child(), initial_heap_size); - -#pragma omp task - exploration_ramp_up(node->get_up_child(), initial_heap_size); - - } else { - // We've generated enough nodes, push further nodes onto the heap - node_queue_.push(node->get_down_child()); - node_queue_.push(node->get_up_child()); - } - } -} - -template -void branch_and_bound_t::plunge_from(i_t task_id, - mip_node_t* start_node, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) -{ - bool recompute_bounds_and_basis = true; std::deque*> stack; - stack.push_front(start_node); - - while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { - if (task_id == 0) { repair_heuristic_solutions(); } + stack.push_front(worker->start_node); + worker->recompute_basis = true; + worker->recompute_bounds = true; + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); f_t lower_bound = node_ptr->lower_bound; f_t upper_bound = upper_bound_; - f_t abs_gap = upper_bound - lower_bound; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); // This is based on three assumptions: @@ -1208,58 +1051,28 @@ void branch_and_bound_t::plunge_from(i_t task_id, // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - assert(task_id < local_lower_bounds_.size()); - local_lower_bounds_[task_id] = lower_bound; + worker->lower_bound = lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree_.update(node_ptr, node_status_t::FATHOMED); - recompute_bounds_and_basis = true; + worker->recompute_basis = true; + worker->recompute_bounds = true; --exploration_stats_.nodes_unexplored; continue; } - f_t now = toc(exploration_stats_.start_time); - - if (task_id == 0) { - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 1000 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - report(' ', upper_bound, get_lower_bound(), node_ptr->depth, node_ptr->integer_infeasible); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - } - - if (now > settings_.time_limit) { + if (toc(exploration_stats_.start_time) > settings_.time_limit) { solver_status_ = mip_status_t::TIME_LIMIT; break; } + if (exploration_stats_.nodes_explored >= settings_.node_limit) { solver_status_ = mip_status_t::NODE_LIMIT; break; } - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node_ptr, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - bnb_worker_type_t::BEST_FIRST, - recompute_bounds_and_basis, - original_lp_.lower, - original_lp_.upper, - exploration_stats_, - settings_.log); + dual::status_t lp_status = solve_node_lp(node_ptr, worker, exploration_stats_, settings_.log); if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; @@ -1272,16 +1085,11 @@ void branch_and_bound_t::plunge_from(i_t task_id, ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - auto [node_status, round_dir] = update_tree(node_ptr, - search_tree_, - leaf_problem, - leaf_solution, - leaf_edge_norms, - bnb_worker_type_t::BEST_FIRST, - lp_status, - settings_.log); + auto [node_status, round_dir] = + update_tree(node_ptr, search_tree_, worker, lp_status, settings_.log); - recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { // The stack should only contain the children of the current parent. @@ -1297,143 +1105,73 @@ void branch_and_bound_t::plunge_from(i_t task_id, exploration_stats_.nodes_unexplored += 2; if (round_dir == rounding_direction_t::UP) { - stack.push_front(node_ptr->get_down_child()); + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_down_child()); + } else { + stack.push_front(node_ptr->get_down_child()); + } + stack.push_front(node_ptr->get_up_child()); } else { - stack.push_front(node_ptr->get_up_child()); - stack.push_front(node_ptr->get_down_child()); - } - } - } -} - -template -void branch_and_bound_t::best_first_thread(i_t task_id) -{ - f_t lower_bound = -inf; - f_t abs_gap = inf; - f_t rel_gap = inf; - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + } - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { - // In the current implementation, we are use the active number of subtree to decide - // when to stop the execution. We need to increment the counter at the same - // time as we pop a node from the queue to avoid some threads exiting - // the main loop thinking that the solver has already finished. - // This will be not needed in the master-worker model. - node_queue_.lock(); - // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); - if (start_node.has_value()) { active_subtrees_++; }; - node_queue_.unlock(); - - if (start_node.has_value()) { - if (upper_bound_ < start_node.value()->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - active_subtrees_--; - continue; + stack.push_front(node_ptr->get_down_child()); } - - // Best-first search with plunging - plunge_from(task_id, - start_node.value(), - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); - - active_subtrees_--; } - - lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } - is_running = false; - - // Check if it is the last thread that exited the loop and no - // timeout or numerical error has happen. - if (solver_status_ == mip_status_t::UNSET) { - if (active_subtrees_ > 0) { local_lower_bounds_[task_id] = inf; } + if (settings_.num_threads > 1) { + worker_pool_.return_worker_to_pool(worker); + active_workers_per_strategy_[BEST_FIRST]--; } } template -void branch_and_bound_t::dive_from(mip_node_t& start_node, - const std::vector& start_lower, - const std::vector& start_upper, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bnb_worker_type_t diving_type) +void branch_and_bound_t::dive_with(branch_and_bound_worker_t* worker) { logger_t log; log.log = false; - const i_t diving_node_limit = settings_.diving_settings.node_limit; - const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; - bool recompute_bounds_and_basis = true; - search_tree_t dive_tree(std::move(start_node)); + search_strategy_t search_strategy = worker->search_strategy; + const i_t diving_node_limit = settings_.diving_settings.node_limit; + const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; + + worker->recompute_basis = true; + worker->recompute_bounds = true; + + search_tree_t dive_tree(std::move(*worker->start_node)); std::deque*> stack; stack.push_front(&dive_tree.root); - bnb_stats_t dive_stats; + branch_and_bound_stats_t dive_stats; dive_stats.total_lp_iters = 0; dive_stats.total_lp_solve_time = 0; dive_stats.nodes_explored = 0; dive_stats.nodes_unexplored = 1; - while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running_) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); - f_t lower_bound = node_ptr->lower_bound; - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t lower_bound = node_ptr->lower_bound; + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + worker->lower_bound = lower_bound; if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds_and_basis = true; + worker->recompute_basis = true; + worker->recompute_bounds = true; continue; } if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } if (dive_stats.nodes_explored > diving_node_limit) { break; } - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node_ptr, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - diving_type, - recompute_bounds_and_basis, - start_lower, - start_upper, - dive_stats, - log); + dual::status_t lp_status = solve_node_lp(node_ptr, worker, dive_stats, log); if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; @@ -1444,15 +1182,10 @@ void branch_and_bound_t::dive_from(mip_node_t& start_node, ++dive_stats.nodes_explored; - auto [node_status, round_dir] = update_tree(node_ptr, - dive_tree, - leaf_problem, - leaf_solution, - leaf_edge_norms, - diving_type, - lp_status, - log); - recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + auto [node_status, round_dir] = update_tree(node_ptr, dive_tree, worker, lp_status, log); + + worker->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { if (round_dir == rounding_direction_t::UP) { @@ -1470,68 +1203,207 @@ void branch_and_bound_t::dive_from(mip_node_t& start_node, stack.pop_back(); } } + + worker_pool_.return_worker_to_pool(worker); + active_workers_per_strategy_[search_strategy]--; } template -void branch_and_bound_t::diving_thread(bnb_worker_type_t diving_type) +void branch_and_bound_t::run_scheduler() { - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - std::vector bounds_changed(original_lp_.num_cols, false); + diving_heuristics_settings_t diving_settings = settings_.diving_settings; + const i_t num_workers = 2 * settings_.num_threads; - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; + if (!std::isfinite(upper_bound_)) { diving_settings.guided_diving = false; } + std::vector strategies = get_search_strategies(diving_settings); + std::array max_num_workers_per_type = + get_max_workers(num_workers, strategies); + + worker_pool_.init(num_workers, original_lp_, Arow_, var_types_, settings_); + active_workers_per_strategy_.fill(0); + +#ifdef CUOPT_LOG_DEBUG + for (auto strategy : strategies) { + settings_.log.debug("%c%d: max num of workers = %d", + feasible_solution_symbol(strategy), + strategy, + max_num_workers_per_type[strategy]); + } +#endif + + f_t lower_bound = get_lower_bound(); + f_t abs_gap = upper_bound_ - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + i_t last_node_depth = 0; + i_t last_int_infeas = 0; + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) { + bool launched_any_task = false; + lower_bound = get_lower_bound(); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + repair_heuristic_solutions(); + + // If the guided diving was disabled previously due to the lack of an incumbent solution, + // re-enable as soon as a new incumbent is found. + if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { + if (std::isfinite(upper_bound_)) { + diving_settings.guided_diving = settings_.diving_settings.guided_diving; + strategies = get_search_strategies(diving_settings); + max_num_workers_per_type = get_max_workers(num_workers, strategies); + +#ifdef CUOPT_LOG_DEBUG + for (auto type : strategies) { + settings_.log.debug("%c%d: max num of workers = %d", + feasible_solution_symbol(type), + type, + max_num_workers_per_type[type]); + } +#endif + } + } - std::vector start_lower; - std::vector start_upper; - bool reset_starting_bounds = true; - - while (solver_status_ == mip_status_t::UNSET && is_running && - (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { - if (reset_starting_bounds) { - start_lower = original_lp_.lower; - start_upper = original_lp_.upper; - std::fill(bounds_changed.begin(), bounds_changed.end(), false); - reset_starting_bounds = false; + f_t now = toc(exploration_stats_.start_time); + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + i_t queue_size = node_queue_.best_first_queue_size(); + i_t depth = queue_size > 0 ? node_queue_.bfs_top()->depth : last_node_depth; + i_t int_infeas = queue_size > 0 ? node_queue_.bfs_top()->integer_infeasible : last_int_infeas; + report(' ', upper_bound_, lower_bound, depth, int_infeas); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; } - // In the current implementation, multiple threads can pop the nodes - // from the queue, so we need to initialize the lower and upper bound here - // to avoid other thread fathoming the node (i.e., deleting) before we can read - // the variable bounds from the tree. - // This will be not needed in the master-worker model. - node_queue_.lock(); - std::optional*> node_ptr = node_queue_.pop_diving(); - std::optional> start_node = std::nullopt; - - if (node_ptr.has_value()) { - node_ptr.value()->get_variable_bounds(start_lower, start_upper, bounds_changed); - start_node = node_ptr.value()->detach_copy(); + if (now > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; } - node_queue_.unlock(); - if (start_node.has_value()) { - reset_starting_bounds = true; + for (auto strategy : strategies) { + if (active_workers_per_strategy_[strategy] >= max_num_workers_per_type[strategy]) { + continue; + } + + // Get an idle worker. + branch_and_bound_worker_t* worker = worker_pool_.get_idle_worker(); + if (worker == nullptr) { break; } + + if (strategy == BEST_FIRST) { + // If there any node left in the heap, we pop the top node and explore it. + std::optional*> start_node = node_queue_.pop_best_first(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); + continue; + } - if (upper_bound_ < start_node->lower_bound) { continue; } - bool is_feasible = - node_presolver.bounds_strengthening(settings_, bounds_changed, start_lower, start_upper); - if (!is_feasible) { continue; } - - dive_from(start_node.value(), - start_lower, - start_upper, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list, - diving_type); + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + worker->init_best_first(start_node.value(), original_lp_); + last_node_depth = start_node.value()->depth; + last_int_infeas = start_node.value()->integer_infeasible; + active_workers_per_strategy_[strategy]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + plunge_with(worker); + + } else { + std::optional*> start_node = node_queue_.pop_diving(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound || + start_node.value()->depth < diving_settings.min_node_depth) { + continue; + } + + bool is_feasible = + worker->init_diving(start_node.value(), strategy, original_lp_, settings_); + if (!is_feasible) { continue; } + + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + active_workers_per_strategy_[strategy]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + dive_with(worker); + } } + + // If no new task was launched in this iteration, suspend temporarily the + // execution of the scheduler. As of 8/Jan/2026, GCC does not + // implement taskyield, but LLVM does. + if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + } +} + +template +void branch_and_bound_t::single_threaded_solve() +{ + branch_and_bound_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); + + f_t lower_bound = get_lower_bound(); + f_t abs_gap = upper_bound_ - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) { + bool launched_any_task = false; + lower_bound = get_lower_bound(); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + repair_heuristic_solutions(); + + f_t now = toc(exploration_stats_.start_time); + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + i_t depth = node_queue_.bfs_top()->depth; + i_t int_infeas = node_queue_.bfs_top()->integer_infeasible; + report(' ', upper_bound_, lower_bound, depth, int_infeas); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; + } + + if (now > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } + + // If there any node left in the heap, we pop the top node and explore it. + std::optional*> start_node = node_queue_.pop_best_first(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); + continue; + } + + worker.init_best_first(start_node.value(), original_lp_); + plunge_with(&worker); } } @@ -1675,7 +1547,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( solver_name.c_str()); settings_.log.printf("Root relaxation objective %+.8e\n", user_objective); } else { - settings_.log.printf("Root relaxation returned status: %s\n", + settings_.log.printf("Root relaxation returned: %s\n", lp_status_to_string(root_status).c_str()); } @@ -1692,14 +1564,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut log.log = false; log.log_prefix = settings_.log.log_prefix; solver_status_ = mip_status_t::UNSET; - is_running = false; + is_running_ = false; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; original_lp_.A.to_compressed_row(Arow_); - std::vector diving_strategies; - initialize_diving_heuristics_settings(diving_strategies); - if (guess_.size() != 0) { std::vector crushed_guess; crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess); @@ -1728,6 +1597,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector nonbasic_list; basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t root_status; + if (!enable_concurrent_lp_root_solve()) { // RINS/SUBMIP path settings_.log.printf("\nSolving LP root relaxation with dual simplex\n"); @@ -1775,6 +1645,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_final_solution(solution, -inf); return solver_status_; } + if (root_status == lp_status_t::NUMERICAL_ISSUES) { solver_status_ = mip_status_t::NUMERICAL; set_final_solution(solution, -inf); @@ -1785,7 +1656,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -1806,12 +1676,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); cut_info_t cut_info; + if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; } - is_running = true; + is_running_ = true; lower_bound_ceiling_ = inf; if (num_fractional != 0 && settings_.max_cut_passes > 0) { @@ -2026,8 +1897,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); - f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); remove_cuts(original_lp_, @@ -2171,58 +2040,39 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, original_lp_, log); + node_queue_.push(search_tree_.root.get_down_child()); + node_queue_.push(search_tree_.root.get_up_child()); - settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n\n", - settings_.num_threads, - settings_.num_bfs_workers, - settings_.num_threads - settings_.num_bfs_workers); + settings_.log.printf("Exploring the B&B tree using %d threads\n\n", settings_.num_threads); exploration_stats_.nodes_explored = 0; exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); - active_subtrees_ = 0; - lower_bound_ceiling_ = inf; - should_report_ = true; + min_node_queue_size_ = 2 * settings_.num_threads; + + if (settings_.diving_settings.coefficient_diving != 0) { + calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); + } settings_.log.printf( " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " "Gap " "| Time |\n"); + + if (settings_.num_threads > 1) { #pragma omp parallel num_threads(settings_.num_threads) - { -#pragma omp master { - auto down_child = search_tree_.root.get_down_child(); - auto up_child = search_tree_.root.get_up_child(); - i_t initial_size = 2 * settings_.num_threads; - const i_t num_strategies = diving_strategies.size(); - -#pragma omp taskgroup - { -#pragma omp task - exploration_ramp_up(down_child, initial_size); - -#pragma omp task - exploration_ramp_up(up_child, initial_size); - } - - for (i_t i = 0; i < settings_.num_bfs_workers; i++) { -#pragma omp task - best_first_thread(i); - } - - if (!diving_strategies.empty()) { - for (i_t k = 0; k < settings_.diving_settings.num_diving_workers; k++) { - const bnb_worker_type_t diving_type = diving_strategies[k % num_strategies]; -#pragma omp task - diving_thread(diving_type); - } - } +#pragma omp master + run_scheduler(); } + + } else { + single_threaded_solve(); } - is_running = false; + is_running_ = false; + f_t lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() : search_tree_.root.lower_bound; set_final_solution(solution, lower_bound); diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 6db45e153..1d6947681 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include @@ -37,38 +38,9 @@ enum class mip_status_t { UNSET = 6, // The status is not set }; -// Indicate the search and variable selection algorithms used by each thread -// in B&B (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum class bnb_worker_type_t { - BEST_FIRST = 0, // Best-First + Plunging. - PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) - LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) - GUIDED_DIVING = 3, // Guided diving (9.2.3). If no incumbent is found yet, use pseudocost diving. - COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) -}; - -template -class bounds_strengthening_t; - template void upper_bound_callback(f_t upper_bound); -template -struct bnb_stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; -}; - template class branch_and_bound_t { public: @@ -153,9 +125,6 @@ class branch_and_bound_t { std::vector var_up_locks_; std::vector var_down_locks_; - // Local lower bounds for each thread - std::vector> local_lower_bounds_; - // Mutex for the original LP // The heuristics threads look at the original LP. But the main thread modifies the // size of the original LP by adding slacks for cuts. Heuristic threads should lock @@ -173,7 +142,7 @@ class branch_and_bound_t { mip_solution_t incumbent_; // Structure with the general info of the solver. - bnb_stats_t exploration_stats_; + branch_and_bound_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -200,14 +169,21 @@ class branch_and_bound_t { // Search tree search_tree_t search_tree_; - // Count the number of subtrees that are currently being explored. - omp_atomic_t active_subtrees_; + // Count the number of workers per type that either are being executed or + // are waiting to be executed. + std::array, num_search_strategies> active_workers_per_strategy_; + + // Worker pool + branch_and_bound_worker_pool_t worker_pool_; // Global status of the solver. omp_atomic_t solver_status_; - omp_atomic_t is_running{false}; + omp_atomic_t is_running_{false}; - omp_atomic_t should_report_; + // Minimum number of node in the queue. When the queue size is less than + // this variable, the nodes are added directly to the queue instead of + // the local stack. This also determines the end of the ramp-up phase. + i_t min_node_queue_size_; // In case, a best-first thread encounters a numerical issue when solving a node, // its blocks the progression of the lower bound. @@ -230,82 +206,48 @@ class branch_and_bound_t { void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - bnb_worker_type_t thread_type); + search_strategy_t thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - // Initialize diving heuristics settings - void initialize_diving_heuristics_settings(std::vector& diving_strategies); - - // Ramp-up phase of the solver, where we greedily expand the tree until - // there is enough unexplored nodes. This is done recursively using OpenMP tasks. - void exploration_ramp_up(mip_node_t* node, i_t initial_heap_size); - // We use best-first to pick the `start_node` and then perform a depth-first search // from this node (i.e., a plunge). It can only backtrack to a sibling node. // Unexplored nodes in the subtree are inserted back into the global heap. - void plunge_from(i_t task_id, - mip_node_t* start_node, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); - - // Each "main" thread pops a node from the global heap and then performs a plunge - // (i.e., a shallow dive) into the subtree determined by the node. - void best_first_thread(i_t task_id); + void plunge_with(branch_and_bound_worker_t* worker); // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. - void dive_from(mip_node_t& start_node, - const std::vector& start_lower, - const std::vector& start_upper, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - bnb_worker_type_t diving_type); - - // Each diving thread pops the first node from the dive queue and then performs - // a deep dive into the subtree determined by the node. - void diving_thread(bnb_worker_type_t diving_type); + void dive_with(branch_and_bound_worker_t* worker); + + // Run the scheduler whose will schedule and manage + // all the other workers. + void run_scheduler(); + + // Run the branch-and-bound algorithm in single threaded mode. + // This disable all diving heuristics. + void single_threaded_solve(); // Solve the LP relaxation of a leaf node dual::status_t solve_node_lp(mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - bnb_worker_type_t thread_type, - bool recompute_bounds_and_basis, - const std::vector& root_lower, - const std::vector& root_upper, - bnb_stats_t& stats, + branch_and_bound_worker_t* worker, + branch_and_bound_stats_t& stats, logger_t& log); // Update the tree based on the LP relaxation. Returns the status // of the node and, if appropriated, the preferred rounding direction // when visiting the children. - std::pair update_tree(mip_node_t* node_ptr, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - bnb_worker_type_t thread_type, - dual::status_t lp_status, - logger_t& log); + std::pair update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + branch_and_bound_worker_t* worker, + dual::status_t lp_status, + logger_t& log); // Selects the variable to branch on. branch_variable_t variable_selection(mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - bnb_worker_type_t type); + branch_and_bound_worker_t* worker); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/branch_and_bound_worker.hpp b/cpp/src/dual_simplex/branch_and_bound_worker.hpp new file mode 100644 index 000000000..99a59a871 --- /dev/null +++ b/cpp/src/dual_simplex/branch_and_bound_worker.hpp @@ -0,0 +1,280 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int num_search_strategies = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum search_strategy_t : int { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +template +struct branch_and_bound_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + omp_atomic_t nodes_since_last_log = 0; + omp_atomic_t last_log = 0.0; +}; + +template +class branch_and_bound_worker_t { + public: + const i_t worker_id; + omp_atomic_t search_strategy; + omp_atomic_t is_active; + omp_atomic_t lower_bound; + + lp_problem_t leaf_problem; + lp_solution_t leaf_solution; + std::vector leaf_edge_norms; + + basis_update_mpf_t basis_factors; + std::vector basic_list; + std::vector nonbasic_list; + + bounds_strengthening_t node_presolver; + std::vector bounds_changed; + + std::vector start_lower; + std::vector start_upper; + mip_node_t* start_node; + + pcgenerator_t rng; + + bool recompute_basis = true; + bool recompute_bounds = true; + + branch_and_bound_worker_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + : worker_id(worker_id), + search_strategy(BEST_FIRST), + is_active(false), + lower_bound(-std::numeric_limits::infinity()), + leaf_problem(original_lp), + leaf_solution(original_lp.num_rows, original_lp.num_cols), + basis_factors(original_lp.num_rows, settings.refactor_frequency), + basic_list(original_lp.num_rows), + nonbasic_list(), + node_presolver(leaf_problem, Arow, {}, var_type), + bounds_changed(original_lp.num_cols, false), + rng(settings.random_seed + pcgenerator_t::default_seed + worker_id, + pcgenerator_t::default_stream ^ worker_id) + { + } + + // Set the `start_node` for best-first search. + void init_best_first(mip_node_t* node, const lp_problem_t& original_lp) + { + start_node = node; + start_lower = original_lp.lower; + start_upper = original_lp.upper; + search_strategy = BEST_FIRST; + lower_bound = node->lower_bound; + is_active = true; + } + + // Initialize the worker for diving, setting the `start_node`, `start_lower` and + // `start_upper`. Returns `true` if the starting node is feasible via + // bounds propagation. + bool init_diving(mip_node_t* node, + search_strategy_t type, + const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings) + { + internal_node = node->detach_copy(); + start_node = &internal_node; + + start_lower = original_lp.lower; + start_upper = original_lp.upper; + search_strategy = type; + lower_bound = node->lower_bound; + is_active = true; + + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + node->get_variable_bounds(start_lower, start_upper, bounds_changed); + return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); + } + + // Set the variables bounds for the LP relaxation of the current node. + bool set_lp_variable_bounds(mip_node_t* node_ptr, + const simplex_solver_settings_t& settings) + { + // Reset the bound_changed markers + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + + // Set the correct bounds for the leaf problem + if (recompute_bounds) { + leaf_problem.lower = start_lower; + leaf_problem.upper = start_upper; + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + } else { + node_ptr->update_branched_variable_bounds( + leaf_problem.lower, leaf_problem.upper, bounds_changed); + } + + return node_presolver.bounds_strengthening( + settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); + } + + private: + // For diving, we need to store the full node instead of + // of just a pointer, since it is not stored in the tree anymore. + // To keep the same interface across all worker types, + // this will be used as a temporary storage and + // will be pointed by `start_node`. + // For exploration, this will not be used. + mip_node_t internal_node; +}; + +template +class branch_and_bound_worker_pool_t { + public: + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + { + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = std::make_unique>( + i, original_lp, Arow, var_type, settings); + idle_workers_.push_front(i); + } + + is_initialized = true; + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + branch_and_bound_worker_t* get_idle_worker() + { + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + return workers_[idx].get(); + } + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + void pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (!idle_workers_.empty()) { + idle_workers_.pop_front(); + num_idle_workers_--; + } + } + + void return_worker_to_pool(branch_and_bound_worker_t* worker) + { + worker->is_active = false; + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + } + + return lower_bound; + } + + i_t num_idle_workers() { return num_idle_workers_; } + + private: + // Worker pool + std::vector>> workers_; + bool is_initialized = false; + + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; +}; + +template +std::vector get_search_strategies( + diving_heuristics_settings_t settings) +{ + std::vector types; + types.reserve(num_search_strategies); + types.push_back(BEST_FIRST); + if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } + if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } + if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } + if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } + return types; +} + +template +std::array get_max_workers( + i_t num_workers, const std::vector& strategies) +{ + std::array max_num_workers; + max_num_workers.fill(0); + + i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); + max_num_workers[BEST_FIRST] = bfs_workers; + + i_t diving_workers = (num_workers - bfs_workers); + i_t m = strategies.size() - 1; + + for (size_t i = 1, k = 0; i < strategies.size(); ++i) { + i_t start = (double)k * diving_workers / m; + i_t end = (double)(k + 1) * diving_workers / m; + max_num_workers[strategies[i]] = end - start; + ++k; + } + + return max_num_workers; +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_heuristics.cpp b/cpp/src/dual_simplex/diving_heuristics.cpp index a56b4cce3..b3e91bc09 100644 --- a/cpp/src/dual_simplex/diving_heuristics.cpp +++ b/cpp/src/dual_simplex/diving_heuristics.cpp @@ -71,9 +71,8 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& root_solution, logger_t& log) { - std::lock_guard lock(pc.mutex); i_t branch_var = -1; - f_t max_score = std::numeric_limits::lowest(); + f_t max_score = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; constexpr f_t eps = 1e-6; @@ -127,6 +126,15 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, } } + // If we cannot choose the variable, then arbitrarily pick the first + // fractional variable and round it down. This only happens when + // there is only one fractional variable and its the pseudocost is + // infinite for both direction. + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + assert(round_dir != rounding_direction_t::NONE); assert(branch_var >= 0); @@ -146,9 +154,8 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& incumbent, logger_t& log) { - std::lock_guard lock(pc.mutex); i_t branch_var = -1; - f_t max_score = std::numeric_limits::lowest(); + f_t max_score = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; constexpr f_t eps = 1e-6; diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 5ee4f49d1..8b2211c1d 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -227,18 +227,26 @@ class mip_node_t { } // This method creates a copy of the current node - // with its parent set to `nullptr` and `depth = 0`. + // with its parent set to `nullptr` // This detaches the node from the tree. mip_node_t detach_copy() const { - mip_node_t copy(lower_bound, vstatus); + mip_node_t copy; + copy.lower_bound = lower_bound; + copy.objective_estimate = objective_estimate; + copy.depth = depth; + copy.node_id = node_id; + copy.integer_infeasible = integer_infeasible; + copy.vstatus = vstatus; copy.branch_var = branch_var; copy.branch_dir = branch_dir; copy.branch_var_lower = branch_var_lower; copy.branch_var_upper = branch_var_upper; copy.fractional_val = fractional_val; - copy.objective_estimate = objective_estimate; - copy.node_id = node_id; + copy.parent = nullptr; + copy.children[0] = nullptr; + copy.children[1] = nullptr; + copy.status = node_status_t::PENDING; return copy; } diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp index 28072795a..b5c9a9ea7 100644 --- a/cpp/src/dual_simplex/node_queue.hpp +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -114,26 +114,29 @@ class node_queue_t { std::optional*> pop_best_first() { + std::lock_guard lock(mutex); auto entry = best_first_heap.pop(); + if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } + return std::nullopt; } std::optional*> pop_diving() { + std::lock_guard lock(mutex); + while (!diving_heap.empty()) { auto entry = diving_heap.pop(); + if (entry.has_value()) { if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } } } + return std::nullopt; } - void lock() { mutex.lock(); } - - void unlock() { mutex.unlock(); } - i_t diving_queue_size() { std::lock_guard lock(mutex); diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 978896887..d104036ea 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -619,7 +619,7 @@ void convert_user_problem(const user_problem_t& user_problem, } constexpr bool run_bounds_strengthening = false; - if (run_bounds_strengthening) { + if constexpr (run_bounds_strengthening) { csr_matrix_t Arow(1, 1, 1); problem.A.to_compressed_row(Arow); diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 015034b77..682bdaa6f 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -88,7 +88,7 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = obj - root_obj; + pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); if (verbose) { settings.log.printf("Thread id %2d remaining %d variable %d branch %d obj %e time %.2f\n", thread_id, @@ -99,7 +99,7 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = obj - root_obj; + pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); if (verbose) { settings.log.printf( "Thread id %2d remaining %d variable %d branch %d obj %e change down %e change up %e " @@ -135,6 +135,83 @@ void strong_branch_helper(i_t start, } } +template +f_t trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + f_t start_time, + i_t upper_max_lp_iter, + i_t lower_max_lp_iter, + omp_atomic_t& total_lp_iter) +{ + lp_problem_t child_problem = original_lp; + child_problem.lower[branch_var] = branch_var_lower; + child_problem.upper[branch_var] = branch_var_upper; + + const bool initialize_basis = false; + simplex_solver_settings_t child_settings = settings; + child_settings.set_log(false); + i_t lp_iter_upper = upper_max_lp_iter; + i_t lp_iter_lower = lower_max_lp_iter; + child_settings.iteration_limit = std::clamp(bnb_lp_iter_per_node, lp_iter_lower, lp_iter_upper); + child_settings.cut_off = upper_bound + settings.dual_tol; + child_settings.inside_mip = 2; + child_settings.scale_columns = false; + + lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); + i_t iter = 0; + std::vector child_vstatus = vstatus; + std::vector child_edge_norms = edge_norms; + std::vector child_basic_list = basic_list; + std::vector child_nonbasic_list = nonbasic_list; + basis_update_mpf_t child_basis_factors = basis_factors; + + // Only refactor the basis if we encounter numerical issues. + child_basis_factors.set_refactor_frequency(upper_max_lp_iter); + + dual::status_t status = dual_phase2_with_advanced_basis(2, + 0, + initialize_basis, + start_time, + child_problem, + child_settings, + child_vstatus, + child_basis_factors, + child_basic_list, + child_nonbasic_list, + solution, + iter, + child_edge_norms); + total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", + branch_var, + child_problem.lower[branch_var], + child_problem.upper[branch_var], + iter, + dual::status_to_string(status).c_str(), + compute_objective(child_problem, solution.x)); + + if (status == dual::status_t::DUAL_UNBOUNDED) { + // LP was infeasible + return std::numeric_limits::infinity(); + } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || + status == dual::status_t::CUTOFF) { + return compute_objective(child_problem, solution.x); + } else { + return std::numeric_limits::quiet_NaN(); + } +} + } // namespace template @@ -350,15 +427,31 @@ void strong_branching(const user_problem_t& original_problem, pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); } +template +f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, + const std::vector& solution, + f_t pseudo_cost_up_avg, + f_t pseudo_cost_down_avg) const +{ + constexpr f_t eps = 1e-6; + i_t num_up = pseudo_cost_num_up[j]; + i_t num_down = pseudo_cost_num_down[j]; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); +} + template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - std::lock_guard lock(mutex); - const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; + const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; + if (node_ptr->branch_dir == rounding_direction_t::DOWN) { pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_down[node_ptr->branch_var]++; @@ -412,13 +505,55 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const std::vector& solution, logger_t& log) { - std::lock_guard lock(mutex); + i_t branch_var = fractional[0]; + f_t max_score = -1; + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; - const i_t num_fractional = fractional.size(); - std::vector pseudo_cost_up(num_fractional); - std::vector pseudo_cost_down(num_fractional); - std::vector score(num_fractional); + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + log.printf("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); + + for (i_t j : fractional) { + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + if (score > max_score) { + max_score = score; + branch_var = j; + } + } + + log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", + branch_var, + solution[branch_var], + max_score); + + return branch_var; +} + +template +i_t pseudo_costs_t::reliable_variable_selection( + mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + branch_and_bound_worker_t* worker, + const branch_and_bound_stats_t& bnb_stats, + f_t upper_bound, + int max_num_tasks, + logger_t& log) +{ + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; i_t num_initialized_down; i_t num_initialized_up; f_t pseudo_cost_down_avg; @@ -432,39 +567,162 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio pseudo_cost_down_avg, pseudo_cost_up_avg); - for (i_t k = 0; k < num_fractional; k++) { - const i_t j = fractional[k]; - if (pseudo_cost_num_down[j] != 0) { - pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } else { - pseudo_cost_down[k] = pseudo_cost_down_avg; + const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; + const int64_t branch_and_bound_explored = bnb_stats.nodes_explored; + const i_t branch_and_bound_lp_iter_per_node = + branch_and_bound_lp_iters / bnb_stats.nodes_explored; + + i_t reliable_threshold = settings.reliability_branching; + if (reliable_threshold < 0) { + const i_t max_threshold = reliability_branching_settings.max_reliable_threshold; + const i_t min_threshold = reliability_branching_settings.min_reliable_threshold; + const f_t iter_factor = reliability_branching_settings.bnb_lp_factor; + const i_t iter_offset = reliability_branching_settings.bnb_lp_offset; + const int64_t alpha = iter_factor * branch_and_bound_lp_iters; + const int64_t max_reliability_iter = alpha + reliability_branching_settings.bnb_lp_offset; + + f_t iter_fraction = + (max_reliability_iter - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0); + iter_fraction = std::min(1.0, iter_fraction); + iter_fraction = std::max((alpha - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0), + iter_fraction); + reliable_threshold = (1 - iter_fraction) * min_threshold + iter_fraction * max_threshold; + reliable_threshold = strong_branching_lp_iter < max_reliability_iter ? reliable_threshold : 0; + } + + std::vector unreliable_list; + omp_mutex_t score_mutex; + + for (i_t j : fractional) { + if (pseudo_cost_num_down[j] < reliable_threshold || + pseudo_cost_num_up[j] < reliable_threshold) { + unreliable_list.push_back(j); + continue; } - if (pseudo_cost_num_up[j] != 0) { - pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } else { - pseudo_cost_up[k] = pseudo_cost_up_avg; + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + if (score > max_score) { + max_score = score; + branch_var = j; } - constexpr f_t eps = 1e-6; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - score[k] = - std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps); } - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t select = -1; - for (i_t k = 0; k < num_fractional; k++) { - if (score[k] > max_score) { - max_score = score[k]; - branch_var = fractional[k]; - select = k; + if (unreliable_list.empty()) { + log.printf( + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); + + return branch_var; + } + + const int num_tasks = std::max(max_num_tasks, 1); + const int task_priority = reliability_branching_settings.task_priority; + const i_t max_num_candidates = reliability_branching_settings.max_num_candidates; + const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); + + assert(task_priority > 0); + assert(max_num_candidates > 0); + assert(num_candidates > 0); + assert(num_tasks > 0); + + log.printf( + "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", + strong_branching_lp_iter.load(), + branch_and_bound_lp_iters, + unreliable_list.size(), + num_tasks, + reliable_threshold); + + // Shuffle the unreliable list so every variable has the same chance to be selected. + if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); } + + if (toc(start_time) > settings.time_limit) { + log.printf("Time limit reached"); + return branch_var; + } + +#pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ + shared(score_mutex) + for (i_t i = 0; i < num_candidates; ++i) { + const i_t j = unreliable_list[i]; + + if (toc(start_time) > settings.time_limit) { continue; } + + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + // Do trial branching on the down branch + f_t obj = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + worker->leaf_problem.lower[j], + std::floor(solution[j]), + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = solution[j] - std::floor(solution[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + } } + pseudo_cost_mutex_down[j].unlock(); + + if (toc(start_time) > settings.time_limit) { continue; } + + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + f_t obj = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + std::ceil(solution[j]), + worker->leaf_problem.upper[j], + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + } + } + pseudo_cost_mutex_up[j].unlock(); + + if (toc(start_time) > settings.time_limit) { continue; } + + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + score_mutex.lock(); + if (score > max_score) { + max_score = score; + branch_var = j; + } + score_mutex.unlock(); } log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); return branch_var; } @@ -475,8 +733,6 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, f_t lower_bound, logger_t& log) { - std::lock_guard lock(mutex); - const i_t num_fractional = fractional.size(); f_t estimate = lower_bound; @@ -487,26 +743,15 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - for (i_t k = 0; k < num_fractional; k++) { - const i_t j = fractional[k]; - f_t pseudo_cost_down = 0; - f_t pseudo_cost_up = 0; - - if (pseudo_cost_num_down[j] != 0) { - pseudo_cost_down = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } else { - pseudo_cost_down = pseudo_cost_down_avg; - } - - if (pseudo_cost_num_up[j] != 0) { - pseudo_cost_up = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } else { - pseudo_cost_up = pseudo_cost_up_avg; - } + for (i_t j : fractional) { constexpr f_t eps = 1e-6; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - estimate += std::min(pseudo_cost_down * f_down, pseudo_cost_up * f_up); + i_t num_up = pseudo_cost_num_up[j]; + i_t num_down = pseudo_cost_num_down[j]; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + estimate += std::min(pc_down * f_down, pc_up * f_up); } log.printf("pseudocost estimate = %e\n", estimate); diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 4bdde710f..1d3b0deef 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -7,16 +7,52 @@ #pragma once +#include +#include #include #include #include #include #include +#include #include namespace cuopt::linear_programming::dual_simplex { +template +struct reliability_branching_settings_t { + // Lower bound for the maximum number of LP iterations for a single trial branching + i_t lower_max_lp_iter = 10; + + // Upper bound for the maximum number of LP iterations for a single trial branching + i_t upper_max_lp_iter = 500; + + // Priority of the tasks created when running the trial branching in parallel. + // Set to 1 to have the same priority as the other tasks. + i_t task_priority = 5; + + // The maximum number of candidates initialized by strong branching in a single + // node + i_t max_num_candidates = 100; + + // Define the maximum number of iteration spent in strong branching. + // Let `bnb_lp_iter` = total number of iterations in B&B, then + // `max iter in strong branching = bnb_lp_factor * bnb_lp_iter + bnb_lp_offset`. + // This is used for determining the `reliable_threshold`. + f_t bnb_lp_factor = 0.5; + i_t bnb_lp_offset = 100000; + + // Maximum and minimum points in curve to determine the value + // of the `reliable_threshold` based on the current number of LP + // iterations in strong branching and B&B. Since it is a + // a curve, the actual value of `reliable_threshold` may be + // higher than `max_reliable_threshold`. + // Only used when `reliable_threshold` is negative + i_t max_reliable_threshold = 5; + i_t min_reliable_threshold = 1; +}; + template class pseudo_costs_t { public: @@ -24,7 +60,9 @@ class pseudo_costs_t { : pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), pseudo_cost_num_down(num_variables), - pseudo_cost_num_up(num_variables) + pseudo_cost_num_up(num_variables), + pseudo_cost_mutex_up(num_variables), + pseudo_cost_mutex_down(num_variables) { } @@ -36,6 +74,8 @@ class pseudo_costs_t { pseudo_cost_sum_up.assign(num_variables, 0); pseudo_cost_num_down.assign(num_variables, 0); pseudo_cost_num_up.assign(num_variables, 0); + pseudo_cost_mutex_up.resize(num_variables); + pseudo_cost_mutex_down.resize(num_variables); } void initialized(i_t& num_initialized_down, @@ -52,17 +92,37 @@ class pseudo_costs_t { const std::vector& solution, logger_t& log); + i_t reliable_variable_selection(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + branch_and_bound_worker_t* worker, + const branch_and_bound_stats_t& bnb_stats, + f_t upper_bound, + int max_num_tasks, + logger_t& log); + void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); - std::vector pseudo_cost_sum_up; - std::vector pseudo_cost_sum_down; - std::vector pseudo_cost_num_up; - std::vector pseudo_cost_num_down; + + f_t calculate_pseudocost_score(i_t j, + const std::vector& solution, + f_t pseudo_cost_up_avg, + f_t pseudo_cost_down_avg) const; + + reliability_branching_settings_t reliability_branching_settings; + + std::vector> pseudo_cost_sum_up; + std::vector> pseudo_cost_sum_down; + std::vector> pseudo_cost_num_up; + std::vector> pseudo_cost_num_down; std::vector strong_branch_down; std::vector strong_branch_up; - - omp_mutex_t mutex; + std::vector pseudo_cost_mutex_up; + std::vector pseudo_cost_mutex_down; omp_atomic_t num_strong_branches_completed = 0; + omp_atomic_t strong_branching_lp_iter = 0; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index f9911ee53..34c384949 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -22,18 +22,25 @@ namespace cuopt::linear_programming::dual_simplex { template struct diving_heuristics_settings_t { - i_t num_diving_workers = -1; - // -1 automatic, 0 disabled, 1 enabled i_t line_search_diving = -1; i_t pseudocost_diving = -1; i_t guided_diving = -1; i_t coefficient_diving = -1; - i_t min_node_depth = 10; - i_t node_limit = 500; + // The minimum depth to start diving from. + i_t min_node_depth = 10; + + // The maximum number of nodes when performing a dive. + i_t node_limit = 500; + + // The maximum number of dual simplex iteration allowed + // in a single dive. This set in terms of the total number of + // iterations in the best-first threads. f_t iteration_limit_factor = 0.05; - i_t backtrack_limit = 5; + + // The maximum backtracking allowed. + i_t backtrack_limit = 5; }; template @@ -95,16 +102,14 @@ struct simplex_solver_settings_t { reduced_cost_strengthening(-1), cut_change_threshold(1e-3), cut_min_orthogonality(0.5), - num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), + reliability_branching(-1), inside_mip(0), sub_mip(0), - reliability_branching(-1), solution_callback(nullptr), heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) { - diving_settings.num_diving_workers = std::max(num_threads - num_bfs_workers, 1); } void set_log(bool logging) const { log.log = logging; } @@ -175,15 +180,20 @@ struct simplex_solver_settings_t { // strengthening f_t cut_change_threshold; // threshold for cut change f_t cut_min_orthogonality; // minimum orthogonality for cuts - i_t num_bfs_workers; // number of threads dedicated to the best-first search i_t mip_batch_pdlp_strong_branching{0}; // 0 if not using batch PDLP for strong branching, 1 if // using batch PDLP for strong branching diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + // Settings for the reliability branching. + // - -1: automatic + // - 0: disable (use pseudocost branching instead) + // - k > 0, a variable is considered reliable if it has been branched on k times. + i_t reliability_branching; + i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node i_t sub_mip; // 0 if in regular MIP solve, 1 if in sub-MIP solve - i_t reliability_branching; // -1 automatic, 0 to disable, >0 to enable reliability branching + std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp index a678e2fd7..5739cedae 100644 --- a/cpp/src/dual_simplex/solution.hpp +++ b/cpp/src/dual_simplex/solution.hpp @@ -72,8 +72,8 @@ class mip_solution_t { std::vector x; f_t objective; f_t lower_bound; - i_t nodes_explored; - i_t simplex_iterations; + int64_t nodes_explored; + int64_t simplex_iterations; bool has_incumbent; }; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 9dc6ac9c5..0858bb75a 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -98,6 +98,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}, {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0}, + {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1} }; // Bool parameters diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index cb086e145..7b05f1964 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -259,20 +259,13 @@ void rins_t::run_rins() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = std::min(current_mip_gap, (f_t)settings.target_mip_gap); - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_workers = 1; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.sub_mip = 1; - - // In the future, let RINS use all the diving heuristics. For now, - // restricting to guided diving. - branch_and_bound_settings.diving_settings.num_diving_workers = 1; - branch_and_bound_settings.diving_settings.line_search_diving = 0; - branch_and_bound_settings.diving_settings.coefficient_diving = 0; - branch_and_bound_settings.diving_settings.pseudocost_diving = 0; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 1; + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index e636e7471..6e1c4d4ae 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -104,19 +104,12 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_workers = 1; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.sub_mip = 1; - - // In the future, let SubMIP use all the diving heuristics. For now, - // restricting to guided diving. - branch_and_bound_settings.diving_settings.num_diving_workers = 1; - branch_and_bound_settings.diving_settings.line_search_diving = 0; - branch_and_bound_settings.diving_settings.coefficient_diving = 0; - branch_and_bound_settings.diving_settings.pseudocost_diving = 0; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 1; + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 2829babc9..8583a76f8 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -180,9 +180,10 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; - branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.reliability_branching = solver_settings_.reliability_branching; + branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; + branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; branch_and_bound_settings.mixed_integer_gomory_cuts = context.settings.mixed_integer_gomory_cuts; branch_and_bound_settings.knapsack_cuts = context.settings.knapsack_cuts; @@ -192,21 +193,15 @@ solution_t mip_solver_t::run_solver() context.settings.reduced_cost_strengthening; branch_and_bound_settings.cut_change_threshold = context.settings.cut_change_threshold; branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality; + branch_and_bound_settings.mip_batch_pdlp_strong_branching = + context.settings.mip_batch_pdlp_strong_branching; if (context.settings.num_cpu_threads < 0) { - branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; + branch_and_bound_settings.num_threads = std::max(1, omp_get_max_threads() - 1); } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } - i_t num_threads = branch_and_bound_settings.num_threads; - i_t num_bfs_workers = std::max(1, num_threads / 4); - i_t num_diving_workers = std::max(1, num_threads - num_bfs_workers); - branch_and_bound_settings.num_bfs_workers = num_bfs_workers; - branch_and_bound_settings.diving_settings.num_diving_workers = num_diving_workers; - branch_and_bound_settings.mip_batch_pdlp_strong_branching = - context.settings.mip_batch_pdlp_strong_branching; - // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = std::bind(&branch_and_bound_solution_helper_t::solution_callback, diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index e1b68bf88..f6e66472d 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -10,7 +10,8 @@ #ifdef _OPENMP #include -#include +#include +#include namespace cuopt { @@ -18,14 +19,39 @@ namespace cuopt { // https://www.openmp.org/spec-html/5.1/openmpse39.html#x224-2570003.9 class omp_mutex_t { public: - omp_mutex_t() { omp_init_lock(&mutex); } - virtual ~omp_mutex_t() { omp_destroy_lock(&mutex); } - void lock() { omp_set_lock(&mutex); } - void unlock() { omp_unset_lock(&mutex); } - bool try_lock() { return omp_test_lock(&mutex); } + omp_mutex_t() : mutex(new omp_lock_t) { omp_init_lock(mutex.get()); } + + omp_mutex_t(const omp_mutex_t&) = delete; + + omp_mutex_t(omp_mutex_t&& other) { *this = std::move(other); } + + omp_mutex_t& operator=(const omp_mutex_t&) = delete; + + omp_mutex_t& operator=(omp_mutex_t&& other) + { + if (&other != this) { + if (mutex) { omp_destroy_lock(mutex.get()); } + mutex = std::move(other.mutex); + } + return *this; + } + + virtual ~omp_mutex_t() + { + if (mutex) { + omp_destroy_lock(mutex.get()); + mutex.reset(); + } + } + + void lock() { omp_set_lock(mutex.get()); } + + void unlock() { omp_unset_lock(mutex.get()); } + + bool try_lock() { return omp_test_lock(mutex.get()); } private: - omp_lock_t mutex; + std::unique_ptr mutex; }; // Wrapper for omp atomic operations. See @@ -42,7 +68,7 @@ class omp_atomic_t { return new_val; } - operator T() { return load(); } + operator T() const { return load(); } T operator+=(T inc) { return fetch_add(inc) + inc; } T operator-=(T inc) { return fetch_sub(inc) - inc; } diff --git a/cpp/src/utilities/pcgenerator.hpp b/cpp/src/utilities/pcgenerator.hpp new file mode 100644 index 000000000..29a865f02 --- /dev/null +++ b/cpp/src/utilities/pcgenerator.hpp @@ -0,0 +1,159 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +// Copied from raft/PCGenerator (rng_device.cuh). We cannot use raft implementation +// on the CPU (.cpp file) since the raft header includes CUDA code. +// The original code is from https://www.pcg-random.org/. +namespace cuopt { +class pcgenerator_t { + public: + static constexpr uint64_t default_seed = 0x853c49e6748fea9bULL; + static constexpr uint64_t default_stream = 0xda3e39cb94b95bdbULL; + + /** + * @brief ctor. Initializes the PCG + * @param rng_state is the generator state used for initializing the generator + * @param subsequence specifies the subsequence to be generated out of 2^64 possible subsequences + * In a parallel setting, like threads of a CUDA kernel, each thread is required to generate a + * unique set of random numbers. This can be achieved by initializing the generator with same + * rng_state for all the threads and diststreamt values for subsequence. + */ + pcgenerator_t(const uint64_t seed = default_seed, + const uint64_t subsequence = default_stream, + uint64_t offset = 0) + { + set_seed(seed, subsequence, offset); + } + + // Set the seed, subsequence and offset of the PCG + void set_seed(uint64_t seed, const uint64_t subsequence = default_stream, uint64_t offset = 0) + { + state = uint64_t(0); + stream = (subsequence << 1u) | 1u; + uint32_t discard; + next(discard); + state += seed; + next(discard); + skipahead(offset); + } + + // Based on "Random Number Generation with Arbitrary Strides" F. B. Brown + // Link https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf + void skipahead(uint64_t offset) + { + uint64_t G = 1; + uint64_t h = 6364136223846793005ULL; + uint64_t C = 0; + uint64_t f = stream; + while (offset) { + if (offset & 1) { + G = G * h; + C = C * h + f; + } + f = f * (h + 1); + h = h * h; + offset >>= 1; + } + state = state * G + C; + } + + /** + * @defgroup NextRand Generate the next random number + * @brief This code is derived from PCG basic code + * @{ + */ + uint32_t next_u32() + { + uint32_t ret; + uint64_t oldstate = state; + state = oldstate * 6364136223846793005ULL + stream; + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + ret = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); + return ret; + } + + uint64_t next_u64() + { + uint64_t ret; + uint32_t a, b; + a = next_u32(); + b = next_u32(); + ret = uint64_t(a) | (uint64_t(b) << 32); + return ret; + } + + int32_t next_i32() + { + int32_t ret; + uint32_t val; + val = next_u32(); + ret = int32_t(val & 0x7fffffff); + return ret; + } + + int64_t next_i64() + { + int64_t ret; + uint64_t val; + val = next_u64(); + ret = int64_t(val & 0x7fffffffffffffff); + return ret; + } + + float next_float() { return static_cast((next_u32() >> 8) * 0x1.0p-24); } + + double next_double() { return static_cast((next_u64() >> 11) * 0x1.0p-53); } + + template + T next() + { + T val; + next(val); + return val; + } + + void next(uint32_t& ret) { ret = next_u32(); } + void next(uint64_t& ret) { ret = next_u64(); } + void next(int32_t& ret) { ret = next_i32(); } + void next(int64_t& ret) { ret = next_i64(); } + void next(float& ret) { ret = next_float(); } + void next(double& ret) { ret = next_double(); } + + /// Draws a sample from a uniform distribution. The samples are uniformly distributed over + /// the semi-closed interval `[low, high)`. This routine may have a **slight bias** toward + /// some numbers in the range (scaling by floating-point). + template + T uniform(T low, T high) + { + double val = next_double(); + T range = high - low; + return low + (val * range); + } + + // Shuffles the contents of a sequence using the Fisher–Yates algorithm. + template + void shuffle(std::vector& seq) + { + if (seq.empty()) { return; } + for (size_t i = 0; i < seq.size() - 1; ++i) { + size_t j = uniform(i, seq.size()); + if (j != i) std::swap(seq[i], seq[j]); + } + } + + private: + uint64_t state; + uint64_t stream; +}; +} // namespace cuopt