diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 213e38e5e..308c7087b 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -147,6 +147,7 @@ int run_single_file(std::string file_path, int num_cpu_threads, bool write_log_file, bool log_to_console, + int reliability_branching, double time_limit) { const raft::handle_t handle_{}; @@ -196,7 +197,6 @@ int run_single_file(std::string file_path, } } } - settings.time_limit = time_limit; settings.heuristics_only = heuristics_only; settings.num_cpu_threads = num_cpu_threads; @@ -204,6 +204,7 @@ int run_single_file(std::string file_path, settings.tolerances.relative_tolerance = 1e-12; settings.tolerances.absolute_tolerance = 1e-6; settings.presolve = true; + settings.reliability_branching = reliability_branching; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; auto start_run_solver = std::chrono::high_resolution_clock::now(); @@ -256,6 +257,7 @@ void run_single_file_mp(std::string file_path, int num_cpu_threads, bool write_log_file, bool log_to_console, + int reliability_branching, double time_limit) { std::cout << "running file " << file_path << " on gpu : " << device << std::endl; @@ -271,6 +273,7 @@ void run_single_file_mp(std::string file_path, num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); // this is a bad design to communicate the result but better than adding complexity of IPC or // pipes @@ -354,6 +357,11 @@ int main(int argc, char* argv[]) .help("track allocations (t/f)") .default_value(std::string("f")); + program.add_argument("--reliability-branching") + .help("reliability branching: -1 (automatic), 0 (disable) or k > 0 (use k)") + .scan<'i', int>() + .default_value(-1); + // Parse arguments try { program.parse_args(argc, argv); @@ -376,12 +384,13 @@ int main(int argc, char* argv[]) std::string result_file; int batch_num = -1; - bool heuristics_only = program.get("--heuristics-only")[0] == 't'; - int num_cpu_threads = program.get("--num-cpu-threads"); - bool write_log_file = program.get("--write-log-file")[0] == 't'; - bool log_to_console = program.get("--log-to-console")[0] == 't'; - double memory_limit = program.get("--memory-limit"); - bool track_allocations = program.get("--track-allocations")[0] == 't'; + bool heuristics_only = program.get("--heuristics-only")[0] == 't'; + int num_cpu_threads = program.get("--num-cpu-threads"); + bool write_log_file = program.get("--write-log-file")[0] == 't'; + bool log_to_console = program.get("--log-to-console")[0] == 't'; + double memory_limit = program.get("--memory-limit"); + bool track_allocations = program.get("--track-allocations")[0] == 't'; + int reliability_branching = program.get("--reliability-branching"); if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } @@ -469,6 +478,7 @@ int main(int argc, char* argv[]) num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); } else if (sys_pid < 0) { std::cerr << "Fork failed!" << std::endl; @@ -509,6 +519,7 @@ int main(int argc, char* argv[]) num_cpu_threads, write_log_file, log_to_console, + reliability_branching, time_limit); } diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 7caf7aeeb..c52c22c85 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -57,6 +57,7 @@ #define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" #define CUOPT_MIP_SCALING "mip_scaling" #define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" #define CUOPT_MIP_CUT_PASSES "mip_cut_passes" #define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" #define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 863e5d66d..c5c26884f 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -86,6 +86,7 @@ class mip_solver_settings_t { f_t time_limit = std::numeric_limits::infinity(); i_t node_limit = std::numeric_limits::max(); bool heuristics_only = false; + i_t reliability_branching = -1; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound i_t max_cut_passes = 10; // number of cut passes to make i_t mir_cuts = -1; diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 8eca3ba8a..d9783053f 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -223,6 +223,9 @@ class basis_update_mpf_t { reset_stats(); } + basis_update_mpf_t(const basis_update_mpf_t& other) = default; + basis_update_mpf_t& operator=(const basis_update_mpf_t& other) = default; + void print_stats() const { i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; @@ -381,6 +384,8 @@ class basis_update_mpf_t { std::vector& nonbasic_list, std::vector& vstatus); + void set_refactor_frequency(i_t new_frequency) { refactor_frequency_ = new_frequency; } + private: void clear() { diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index d35415904..c6e8b0d17 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -201,26 +201,26 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } #ifdef SHOW_DIVING_TYPE -inline char feasible_solution_symbol(bnb_worker_type_t type) +inline char feasible_solution_symbol(search_strategy_t strategy) { - switch (type) { - case bnb_worker_type_t::BEST_FIRST: return 'B'; - case bnb_worker_type_t::COEFFICIENT_DIVING: return 'C'; - case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'L'; - case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'P'; - case bnb_worker_type_t::GUIDED_DIVING: return 'G'; + switch (strategy) { + case search_strategy_t::BEST_FIRST: return 'B'; + case search_strategy_t::COEFFICIENT_DIVING: return 'C'; + case search_strategy_t::LINE_SEARCH_DIVING: return 'L'; + case search_strategy_t::PSEUDOCOST_DIVING: return 'P'; + case search_strategy_t::GUIDED_DIVING: return 'G'; default: return 'U'; } } #else -inline char feasible_solution_symbol(bnb_worker_type_t type) +inline char feasible_solution_symbol(search_strategy_t strategy) { - switch (type) { - case bnb_worker_type_t::BEST_FIRST: return 'B'; - case bnb_worker_type_t::COEFFICIENT_DIVING: return 'D'; - case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'D'; - case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'D'; - case bnb_worker_type_t::GUIDED_DIVING: return 'D'; + switch (strategy) { + case search_strategy_t::BEST_FIRST: return 'B'; + case search_strategy_t::COEFFICIENT_DIVING: return 'D'; + case search_strategy_t::LINE_SEARCH_DIVING: return 'D'; + case search_strategy_t::PSEUDOCOST_DIVING: return 'D'; + case search_strategy_t::GUIDED_DIVING: return 'D'; default: return 'U'; } } @@ -273,7 +273,8 @@ branch_and_bound_t::branch_and_bound_t( } #endif - upper_bound_ = inf; + upper_bound_ = inf; + root_objective_ = std::numeric_limits::quiet_NaN(); } template @@ -282,18 +283,21 @@ f_t branch_and_bound_t::get_lower_bound() f_t lower_bound = lower_bound_ceiling_.load(); f_t heap_lower_bound = node_queue_.get_lower_bound(); lower_bound = std::min(heap_lower_bound, lower_bound); + lower_bound = std::min(worker_pool_.get_lower_bound(), lower_bound); - for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { - lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); + if (std::isfinite(lower_bound)) { + return lower_bound; + } else if (std::isfinite(root_objective_)) { + return root_objective_; + } else { + return -inf; } - - return std::isfinite(lower_bound) ? lower_bound : -inf; } template void branch_and_bound_t::report_heuristic(f_t obj) { - if (is_running) { + if (is_running_) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(user_obj, user_lower); @@ -671,7 +675,7 @@ template void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - bnb_worker_type_t thread_type) + search_strategy_t thread_type) { bool send_solution = false; @@ -720,91 +724,64 @@ template branch_variable_t branch_and_bound_t::variable_selection( mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - bnb_worker_type_t type) + branch_and_bound_worker_t* worker) { logger_t log; log.log = false; i_t branch_var = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; std::vector current_incumbent; + std::vector& solution = worker->leaf_solution.x; + + switch (worker->search_strategy) { + case search_strategy_t::BEST_FIRST: + + if (settings_.reliability_branching != 0) { + branch_var = pc_.reliable_variable_selection(node_ptr, + fractional, + solution, + settings_, + var_types_, + worker, + exploration_stats_, + upper_bound_, + worker_pool_.num_idle_workers(), + log); + } else { + branch_var = pc_.variable_selection(fractional, solution, log); + } - // If there is no incumbent, use pseudocost diving instead of guided diving - if (upper_bound_ == inf && type == bnb_worker_type_t::GUIDED_DIVING) { - type = bnb_worker_type_t::PSEUDOCOST_DIVING; - } + round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); - switch (type) { - case bnb_worker_type_t::BEST_FIRST: - branch_var = pc_.variable_selection(fractional, solution, log); - round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); return {branch_var, round_dir}; - case bnb_worker_type_t::COEFFICIENT_DIVING: + case search_strategy_t::COEFFICIENT_DIVING: return coefficient_diving( original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); - case bnb_worker_type_t::LINE_SEARCH_DIVING: + case search_strategy_t::LINE_SEARCH_DIVING: return line_search_diving(fractional, solution, root_relax_soln_.x, log); - case bnb_worker_type_t::PSEUDOCOST_DIVING: + case search_strategy_t::PSEUDOCOST_DIVING: return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); - case bnb_worker_type_t::GUIDED_DIVING: + case search_strategy_t::GUIDED_DIVING: mutex_upper_.lock(); current_incumbent = incumbent_.x; mutex_upper_.unlock(); return guided_diving(pc_, fractional, solution, current_incumbent, log); default: - log.debug("Unknown variable selection method: %d\n", type); + log.debug("Unknown variable selection method: %d\n", worker->search_strategy); return {-1, rounding_direction_t::NONE}; } } -template -void branch_and_bound_t::initialize_diving_heuristics_settings( - std::vector& diving_strategies) -{ - diving_strategies.reserve(4); - - if (settings_.diving_settings.pseudocost_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING); - } - - if (settings_.diving_settings.line_search_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING); - } - - if (settings_.diving_settings.guided_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING); - } - - if (settings_.diving_settings.coefficient_diving != 0) { - diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING); - calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); - } - - if (diving_strategies.empty()) { - settings_.log.printf("Warning: All diving heuristics are disabled!\n"); - } -} - template dual::status_t branch_and_bound_t::solve_node_lp( mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - bnb_worker_type_t thread_type, - bool recompute_bounds_and_basis, - const std::vector& root_lower, - const std::vector& root_upper, - bnb_stats_t& stats, + branch_and_bound_worker_t* worker, + branch_and_bound_stats_t& stats, logger_t& log) { #ifdef DEBUG_BRANCHING @@ -843,7 +820,7 @@ dual::status_t branch_and_bound_t::solve_node_lp( #endif std::vector& leaf_vstatus = node_ptr->vstatus; - assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(leaf_vstatus.size() == worker->leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); @@ -852,10 +829,10 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; - if (thread_type != bnb_worker_type_t::BEST_FIRST) { - i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + if (worker->search_strategy != search_strategy_t::BEST_FIRST) { + int64_t bnb_lp_iters = exploration_stats_.total_lp_iters; f_t factor = settings_.diving_settings.iteration_limit_factor; - i_t max_iter = factor * bnb_lp_iters; + int64_t max_iter = factor * bnb_lp_iters; lp_settings.iteration_limit = max_iter - stats.total_lp_iters; if (lp_settings.iteration_limit <= 0) { return dual::status_t::ITERATION_LIMIT; } } @@ -882,24 +859,9 @@ dual::status_t branch_and_bound_t::solve_node_lp( node_ptr->vstatus[node_ptr->branch_var]); #endif - // Reset the bound_changed markers - std::vector bounds_changed(original_lp_.num_cols, false); - - // Set the correct bounds for the leaf problem - if (recompute_bounds_and_basis) { - leaf_problem.lower = root_lower; - leaf_problem.upper = root_upper; - node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - - } else { - node_ptr->update_branched_variable_bounds( - leaf_problem.lower, leaf_problem.upper, bounds_changed); - } - - bool feasible = node_presolver.bounds_strengthening( - lp_settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); - + bool feasible = worker->set_lp_variable_bounds(node_ptr, settings_); dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + worker->leaf_edge_norms = edge_norms_; if (feasible) { i_t node_iter = 0; @@ -907,29 +869,29 @@ dual::status_t branch_and_bound_t::solve_node_lp( lp_status = dual_phase2_with_advanced_basis(2, 0, - recompute_bounds_and_basis, + worker->recompute_basis, lp_start_time, - leaf_problem, + worker->leaf_problem, lp_settings, leaf_vstatus, - basis_factors, - basic_list, - nonbasic_list, - leaf_solution, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + worker->leaf_solution, node_iter, - leaf_edge_norms); + worker->leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { log.debug("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); - lp_status_t second_status = solve_linear_program_with_advanced_basis(leaf_problem, + lp_status_t second_status = solve_linear_program_with_advanced_basis(worker->leaf_problem, lp_start_time, lp_settings, - leaf_solution, - basis_factors, - basic_list, - nonbasic_list, + worker->leaf_solution, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, leaf_vstatus, - leaf_edge_norms); + worker->leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } @@ -944,20 +906,18 @@ dual::status_t branch_and_bound_t::solve_node_lp( return lp_status; } - template std::pair branch_and_bound_t::update_tree( mip_node_t* node_ptr, search_tree_t& search_tree, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - bnb_worker_type_t thread_type, + branch_and_bound_worker_t* worker, dual::status_t lp_status, logger_t& log) { const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; std::vector& leaf_vstatus = node_ptr->vstatus; + lp_problem_t& leaf_problem = worker->leaf_problem; + lp_solution_t& leaf_solution = worker->leaf_solution; if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch @@ -997,12 +957,12 @@ std::pair branch_and_bound_t::upd } #endif - f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); - node_ptr->lower_bound = leaf_objective; + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); pc_.update_pseudo_costs(node_ptr, leaf_objective); + node_ptr->lower_bound = leaf_objective; - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { if (settings_.node_processed_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); @@ -1012,15 +972,15 @@ std::pair branch_and_bound_t::upd if (leaf_num_fractional == 0) { // Found a integer feasible solution - add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); + add_feasible_solution( + leaf_objective, leaf_solution.x, node_ptr->depth, worker->search_strategy); search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); search_tree.update(node_ptr, node_status_t::INTEGER_FEASIBLE); return {node_status_t::INTEGER_FEASIBLE, rounding_direction_t::NONE}; } else if (leaf_objective <= upper_bound_ + abs_fathom_tol) { // Choose fractional variable to branch on - auto [branch_var, round_dir] = - variable_selection(node_ptr, leaf_fractional, leaf_solution.x, thread_type); + auto [branch_var, round_dir] = variable_selection(node_ptr, leaf_fractional, worker); assert(leaf_vstatus.size() == leaf_problem.num_cols); assert(branch_var >= 0); @@ -1029,7 +989,7 @@ std::pair branch_and_bound_t::upd // Note that the exploration thread is the only one that can insert new nodes into the heap, // and thus, we only need to calculate the objective estimate here (it is used for // sorting the nodes for diving). - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { logger_t pc_log; pc_log.log = false; node_ptr->objective_estimate = @@ -1052,7 +1012,7 @@ std::pair branch_and_bound_t::upd return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } } else { - if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (worker->search_strategy == search_strategy_t::BEST_FIRST) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " @@ -1070,136 +1030,19 @@ std::pair branch_and_bound_t::upd } template -void branch_and_bound_t::exploration_ramp_up(mip_node_t* node, - i_t initial_heap_size) +void branch_and_bound_t::plunge_with(branch_and_bound_worker_t* worker) { - if (solver_status_ != mip_status_t::UNSET) { return; } - - // Note that we do not know which thread will execute the - // `exploration_ramp_up` task, so we allow to any thread - // to repair the heuristic solution. - repair_heuristic_solutions(); - - f_t lower_bound = node->lower_bound; - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = upper_bound - lower_bound; - - if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree_.graphviz_node(settings_.log, node, "cutoff", node->lower_bound); - search_tree_.update(node, node_status_t::FATHOMED); - --exploration_stats_.nodes_unexplored; - return; - } - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 10 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - (time_since_last_log >= 1)) || - (time_since_last_log > 30) || now > settings_.time_limit) { - bool should_report = should_report_.exchange(false); - - if (should_report) { - report(' ', upper_bound, root_objective_, node->depth, node->integer_infeasible); - exploration_stats_.nodes_since_last_log = 0; - exploration_stats_.last_log = tic(); - should_report_ = true; - } - } - - if (now > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - return; - } - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; - - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - bnb_worker_type_t::BEST_FIRST, - true, - original_lp_.lower, - original_lp_.upper, - exploration_stats_, - settings_.log); - if (lp_status == dual::status_t::TIME_LIMIT) { - solver_status_ = mip_status_t::TIME_LIMIT; - return; - } - - ++exploration_stats_.nodes_since_last_log; - ++exploration_stats_.nodes_explored; - --exploration_stats_.nodes_unexplored; - - auto [node_status, round_dir] = update_tree(node, - search_tree_, - leaf_problem, - leaf_solution, - leaf_edge_norms, - bnb_worker_type_t::BEST_FIRST, - lp_status, - settings_.log); - - if (node_status == node_status_t::HAS_CHILDREN) { - exploration_stats_.nodes_unexplored += 2; - - // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase - if (exploration_stats_.nodes_unexplored < initial_heap_size) { -#pragma omp task - exploration_ramp_up(node->get_down_child(), initial_heap_size); - -#pragma omp task - exploration_ramp_up(node->get_up_child(), initial_heap_size); - - } else { - // We've generated enough nodes, push further nodes onto the heap - node_queue_.push(node->get_down_child()); - node_queue_.push(node->get_up_child()); - } - } -} - -template -void branch_and_bound_t::plunge_from(i_t task_id, - mip_node_t* start_node, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) -{ - bool recompute_bounds_and_basis = true; std::deque*> stack; - stack.push_front(start_node); - - while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { - if (task_id == 0) { repair_heuristic_solutions(); } + stack.push_front(worker->start_node); + worker->recompute_basis = true; + worker->recompute_bounds = true; + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); f_t lower_bound = node_ptr->lower_bound; f_t upper_bound = upper_bound_; - f_t abs_gap = upper_bound - lower_bound; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); // This is based on three assumptions: @@ -1208,58 +1051,28 @@ void branch_and_bound_t::plunge_from(i_t task_id, // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - assert(task_id < local_lower_bounds_.size()); - local_lower_bounds_[task_id] = lower_bound; + worker->lower_bound = lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree_.update(node_ptr, node_status_t::FATHOMED); - recompute_bounds_and_basis = true; + worker->recompute_basis = true; + worker->recompute_bounds = true; --exploration_stats_.nodes_unexplored; continue; } - f_t now = toc(exploration_stats_.start_time); - - if (task_id == 0) { - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 1000 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - report(' ', upper_bound, get_lower_bound(), node_ptr->depth, node_ptr->integer_infeasible); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } - } - - if (now > settings_.time_limit) { + if (toc(exploration_stats_.start_time) > settings_.time_limit) { solver_status_ = mip_status_t::TIME_LIMIT; break; } + if (exploration_stats_.nodes_explored >= settings_.node_limit) { solver_status_ = mip_status_t::NODE_LIMIT; break; } - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node_ptr, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - bnb_worker_type_t::BEST_FIRST, - recompute_bounds_and_basis, - original_lp_.lower, - original_lp_.upper, - exploration_stats_, - settings_.log); + dual::status_t lp_status = solve_node_lp(node_ptr, worker, exploration_stats_, settings_.log); if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; @@ -1272,16 +1085,11 @@ void branch_and_bound_t::plunge_from(i_t task_id, ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - auto [node_status, round_dir] = update_tree(node_ptr, - search_tree_, - leaf_problem, - leaf_solution, - leaf_edge_norms, - bnb_worker_type_t::BEST_FIRST, - lp_status, - settings_.log); + auto [node_status, round_dir] = + update_tree(node_ptr, search_tree_, worker, lp_status, settings_.log); - recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { // The stack should only contain the children of the current parent. @@ -1297,143 +1105,73 @@ void branch_and_bound_t::plunge_from(i_t task_id, exploration_stats_.nodes_unexplored += 2; if (round_dir == rounding_direction_t::UP) { - stack.push_front(node_ptr->get_down_child()); + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_down_child()); + } else { + stack.push_front(node_ptr->get_down_child()); + } + stack.push_front(node_ptr->get_up_child()); } else { - stack.push_front(node_ptr->get_up_child()); - stack.push_front(node_ptr->get_down_child()); - } - } - } -} - -template -void branch_and_bound_t::best_first_thread(i_t task_id) -{ - f_t lower_bound = -inf; - f_t abs_gap = inf; - f_t rel_gap = inf; - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + } - while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && - rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { - // In the current implementation, we are use the active number of subtree to decide - // when to stop the execution. We need to increment the counter at the same - // time as we pop a node from the queue to avoid some threads exiting - // the main loop thinking that the solver has already finished. - // This will be not needed in the master-worker model. - node_queue_.lock(); - // If there any node left in the heap, we pop the top node and explore it. - std::optional*> start_node = node_queue_.pop_best_first(); - if (start_node.has_value()) { active_subtrees_++; }; - node_queue_.unlock(); - - if (start_node.has_value()) { - if (upper_bound_ < start_node.value()->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree_.graphviz_node( - settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); - search_tree_.update(start_node.value(), node_status_t::FATHOMED); - active_subtrees_--; - continue; + stack.push_front(node_ptr->get_down_child()); } - - // Best-first search with plunging - plunge_from(task_id, - start_node.value(), - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); - - active_subtrees_--; } - - lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; - rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } - is_running = false; - - // Check if it is the last thread that exited the loop and no - // timeout or numerical error has happen. - if (solver_status_ == mip_status_t::UNSET) { - if (active_subtrees_ > 0) { local_lower_bounds_[task_id] = inf; } + if (settings_.num_threads > 1) { + worker_pool_.return_worker_to_pool(worker); + active_workers_per_strategy_[BEST_FIRST]--; } } template -void branch_and_bound_t::dive_from(mip_node_t& start_node, - const std::vector& start_lower, - const std::vector& start_upper, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bnb_worker_type_t diving_type) +void branch_and_bound_t::dive_with(branch_and_bound_worker_t* worker) { logger_t log; log.log = false; - const i_t diving_node_limit = settings_.diving_settings.node_limit; - const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; - bool recompute_bounds_and_basis = true; - search_tree_t dive_tree(std::move(start_node)); + search_strategy_t search_strategy = worker->search_strategy; + const i_t diving_node_limit = settings_.diving_settings.node_limit; + const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; + + worker->recompute_basis = true; + worker->recompute_bounds = true; + + search_tree_t dive_tree(std::move(*worker->start_node)); std::deque*> stack; stack.push_front(&dive_tree.root); - bnb_stats_t dive_stats; + branch_and_bound_stats_t dive_stats; dive_stats.total_lp_iters = 0; dive_stats.total_lp_solve_time = 0; dive_stats.nodes_explored = 0; dive_stats.nodes_unexplored = 1; - while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running_) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); - f_t lower_bound = node_ptr->lower_bound; - f_t upper_bound = upper_bound_; - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t lower_bound = node_ptr->lower_bound; + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + worker->lower_bound = lower_bound; if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds_and_basis = true; + worker->recompute_basis = true; + worker->recompute_bounds = true; continue; } if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } if (dive_stats.nodes_explored > diving_node_limit) { break; } - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = solve_node_lp(node_ptr, - leaf_problem, - leaf_solution, - leaf_edge_norms, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - diving_type, - recompute_bounds_and_basis, - start_lower, - start_upper, - dive_stats, - log); + dual::status_t lp_status = solve_node_lp(node_ptr, worker, dive_stats, log); if (lp_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; @@ -1444,15 +1182,10 @@ void branch_and_bound_t::dive_from(mip_node_t& start_node, ++dive_stats.nodes_explored; - auto [node_status, round_dir] = update_tree(node_ptr, - dive_tree, - leaf_problem, - leaf_solution, - leaf_edge_norms, - diving_type, - lp_status, - log); - recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + auto [node_status, round_dir] = update_tree(node_ptr, dive_tree, worker, lp_status, log); + + worker->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; if (node_status == node_status_t::HAS_CHILDREN) { if (round_dir == rounding_direction_t::UP) { @@ -1470,68 +1203,207 @@ void branch_and_bound_t::dive_from(mip_node_t& start_node, stack.pop_back(); } } + + worker_pool_.return_worker_to_pool(worker); + active_workers_per_strategy_[search_strategy]--; } template -void branch_and_bound_t::diving_thread(bnb_worker_type_t diving_type) +void branch_and_bound_t::run_scheduler() { - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); - std::vector bounds_changed(original_lp_.num_cols, false); + diving_heuristics_settings_t diving_settings = settings_.diving_settings; + const i_t num_workers = 2 * settings_.num_threads; - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; + if (!std::isfinite(upper_bound_)) { diving_settings.guided_diving = false; } + std::vector strategies = get_search_strategies(diving_settings); + std::array max_num_workers_per_type = + get_max_workers(num_workers, strategies); + + worker_pool_.init(num_workers, original_lp_, Arow_, var_types_, settings_); + active_workers_per_strategy_.fill(0); + +#ifdef CUOPT_LOG_DEBUG + for (auto strategy : strategies) { + settings_.log.debug("%c%d: max num of workers = %d", + feasible_solution_symbol(strategy), + strategy, + max_num_workers_per_type[strategy]); + } +#endif + + f_t lower_bound = get_lower_bound(); + f_t abs_gap = upper_bound_ - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + i_t last_node_depth = 0; + i_t last_int_infeas = 0; + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) { + bool launched_any_task = false; + lower_bound = get_lower_bound(); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + repair_heuristic_solutions(); + + // If the guided diving was disabled previously due to the lack of an incumbent solution, + // re-enable as soon as a new incumbent is found. + if (settings_.diving_settings.guided_diving != diving_settings.guided_diving) { + if (std::isfinite(upper_bound_)) { + diving_settings.guided_diving = settings_.diving_settings.guided_diving; + strategies = get_search_strategies(diving_settings); + max_num_workers_per_type = get_max_workers(num_workers, strategies); + +#ifdef CUOPT_LOG_DEBUG + for (auto type : strategies) { + settings_.log.debug("%c%d: max num of workers = %d", + feasible_solution_symbol(type), + type, + max_num_workers_per_type[type]); + } +#endif + } + } - std::vector start_lower; - std::vector start_upper; - bool reset_starting_bounds = true; - - while (solver_status_ == mip_status_t::UNSET && is_running && - (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { - if (reset_starting_bounds) { - start_lower = original_lp_.lower; - start_upper = original_lp_.upper; - std::fill(bounds_changed.begin(), bounds_changed.end(), false); - reset_starting_bounds = false; + f_t now = toc(exploration_stats_.start_time); + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + i_t queue_size = node_queue_.best_first_queue_size(); + i_t depth = queue_size > 0 ? node_queue_.bfs_top()->depth : last_node_depth; + i_t int_infeas = queue_size > 0 ? node_queue_.bfs_top()->integer_infeasible : last_int_infeas; + report(' ', upper_bound_, lower_bound, depth, int_infeas); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; } - // In the current implementation, multiple threads can pop the nodes - // from the queue, so we need to initialize the lower and upper bound here - // to avoid other thread fathoming the node (i.e., deleting) before we can read - // the variable bounds from the tree. - // This will be not needed in the master-worker model. - node_queue_.lock(); - std::optional*> node_ptr = node_queue_.pop_diving(); - std::optional> start_node = std::nullopt; - - if (node_ptr.has_value()) { - node_ptr.value()->get_variable_bounds(start_lower, start_upper, bounds_changed); - start_node = node_ptr.value()->detach_copy(); + if (now > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; } - node_queue_.unlock(); - if (start_node.has_value()) { - reset_starting_bounds = true; + for (auto strategy : strategies) { + if (active_workers_per_strategy_[strategy] >= max_num_workers_per_type[strategy]) { + continue; + } + + // Get an idle worker. + branch_and_bound_worker_t* worker = worker_pool_.get_idle_worker(); + if (worker == nullptr) { break; } + + if (strategy == BEST_FIRST) { + // If there any node left in the heap, we pop the top node and explore it. + std::optional*> start_node = node_queue_.pop_best_first(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); + continue; + } - if (upper_bound_ < start_node->lower_bound) { continue; } - bool is_feasible = - node_presolver.bounds_strengthening(settings_, bounds_changed, start_lower, start_upper); - if (!is_feasible) { continue; } - - dive_from(start_node.value(), - start_lower, - start_upper, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list, - diving_type); + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + worker->init_best_first(start_node.value(), original_lp_); + last_node_depth = start_node.value()->depth; + last_int_infeas = start_node.value()->integer_infeasible; + active_workers_per_strategy_[strategy]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + plunge_with(worker); + + } else { + std::optional*> start_node = node_queue_.pop_diving(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound || + start_node.value()->depth < diving_settings.min_node_depth) { + continue; + } + + bool is_feasible = + worker->init_diving(start_node.value(), strategy, original_lp_, settings_); + if (!is_feasible) { continue; } + + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + active_workers_per_strategy_[strategy]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + dive_with(worker); + } } + + // If no new task was launched in this iteration, suspend temporarily the + // execution of the scheduler. As of 8/Jan/2026, GCC does not + // implement taskyield, but LLVM does. + if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + } +} + +template +void branch_and_bound_t::single_threaded_solve() +{ + branch_and_bound_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); + + f_t lower_bound = get_lower_bound(); + f_t abs_gap = upper_bound_ - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) { + bool launched_any_task = false; + lower_bound = get_lower_bound(); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + repair_heuristic_solutions(); + + f_t now = toc(exploration_stats_.start_time); + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + i_t depth = node_queue_.bfs_top()->depth; + i_t int_infeas = node_queue_.bfs_top()->integer_infeasible; + report(' ', upper_bound_, lower_bound, depth, int_infeas); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; + } + + if (now > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } + + // If there any node left in the heap, we pop the top node and explore it. + std::optional*> start_node = node_queue_.pop_best_first(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); + continue; + } + + worker.init_best_first(start_node.value(), original_lp_); + plunge_with(&worker); } } @@ -1675,7 +1547,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( solver_name.c_str()); settings_.log.printf("Root relaxation objective %+.8e\n", user_objective); } else { - settings_.log.printf("Root relaxation returned status: %s\n", + settings_.log.printf("Root relaxation returned: %s\n", lp_status_to_string(root_status).c_str()); } @@ -1692,14 +1564,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut log.log = false; log.log_prefix = settings_.log.log_prefix; solver_status_ = mip_status_t::UNSET; - is_running = false; + is_running_ = false; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; original_lp_.A.to_compressed_row(Arow_); - std::vector diving_strategies; - initialize_diving_heuristics_settings(diving_strategies); - if (guess_.size() != 0) { std::vector crushed_guess; crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess); @@ -1728,6 +1597,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector nonbasic_list; basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t root_status; + if (!enable_concurrent_lp_root_solve()) { // RINS/SUBMIP path settings_.log.printf("\nSolving LP root relaxation with dual simplex\n"); @@ -1775,6 +1645,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_final_solution(solution, -inf); return solver_status_; } + if (root_status == lp_status_t::NUMERICAL_ISSUES) { solver_status_ = mip_status_t::NUMERICAL; set_final_solution(solution, -inf); @@ -1785,7 +1656,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -1806,12 +1676,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); cut_info_t cut_info; + if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; } - is_running = true; + is_running_ = true; lower_bound_ceiling_ = inf; if (num_fractional != 0 && settings_.max_cut_passes > 0) { @@ -2026,8 +1897,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); - f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); remove_cuts(original_lp_, @@ -2171,58 +2040,39 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, original_lp_, log); + node_queue_.push(search_tree_.root.get_down_child()); + node_queue_.push(search_tree_.root.get_up_child()); - settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n\n", - settings_.num_threads, - settings_.num_bfs_workers, - settings_.num_threads - settings_.num_bfs_workers); + settings_.log.printf("Exploring the B&B tree using %d threads\n\n", settings_.num_threads); exploration_stats_.nodes_explored = 0; exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); - active_subtrees_ = 0; - lower_bound_ceiling_ = inf; - should_report_ = true; + min_node_queue_size_ = 2 * settings_.num_threads; + + if (settings_.diving_settings.coefficient_diving != 0) { + calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); + } settings_.log.printf( " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | " "Gap " "| Time |\n"); + + if (settings_.num_threads > 1) { #pragma omp parallel num_threads(settings_.num_threads) - { -#pragma omp master { - auto down_child = search_tree_.root.get_down_child(); - auto up_child = search_tree_.root.get_up_child(); - i_t initial_size = 2 * settings_.num_threads; - const i_t num_strategies = diving_strategies.size(); - -#pragma omp taskgroup - { -#pragma omp task - exploration_ramp_up(down_child, initial_size); - -#pragma omp task - exploration_ramp_up(up_child, initial_size); - } - - for (i_t i = 0; i < settings_.num_bfs_workers; i++) { -#pragma omp task - best_first_thread(i); - } - - if (!diving_strategies.empty()) { - for (i_t k = 0; k < settings_.diving_settings.num_diving_workers; k++) { - const bnb_worker_type_t diving_type = diving_strategies[k % num_strategies]; -#pragma omp task - diving_thread(diving_type); - } - } +#pragma omp master + run_scheduler(); } + + } else { + single_threaded_solve(); } - is_running = false; + is_running_ = false; + f_t lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() : search_tree_.root.lower_bound; set_final_solution(solution, lower_bound); diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 6db45e153..1d6947681 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include @@ -37,38 +38,9 @@ enum class mip_status_t { UNSET = 6, // The status is not set }; -// Indicate the search and variable selection algorithms used by each thread -// in B&B (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum class bnb_worker_type_t { - BEST_FIRST = 0, // Best-First + Plunging. - PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) - LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) - GUIDED_DIVING = 3, // Guided diving (9.2.3). If no incumbent is found yet, use pseudocost diving. - COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) -}; - -template -class bounds_strengthening_t; - template void upper_bound_callback(f_t upper_bound); -template -struct bnb_stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; -}; - template class branch_and_bound_t { public: @@ -153,9 +125,6 @@ class branch_and_bound_t { std::vector var_up_locks_; std::vector var_down_locks_; - // Local lower bounds for each thread - std::vector> local_lower_bounds_; - // Mutex for the original LP // The heuristics threads look at the original LP. But the main thread modifies the // size of the original LP by adding slacks for cuts. Heuristic threads should lock @@ -173,7 +142,7 @@ class branch_and_bound_t { mip_solution_t incumbent_; // Structure with the general info of the solver. - bnb_stats_t exploration_stats_; + branch_and_bound_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -200,14 +169,21 @@ class branch_and_bound_t { // Search tree search_tree_t search_tree_; - // Count the number of subtrees that are currently being explored. - omp_atomic_t active_subtrees_; + // Count the number of workers per type that either are being executed or + // are waiting to be executed. + std::array, num_search_strategies> active_workers_per_strategy_; + + // Worker pool + branch_and_bound_worker_pool_t worker_pool_; // Global status of the solver. omp_atomic_t solver_status_; - omp_atomic_t is_running{false}; + omp_atomic_t is_running_{false}; - omp_atomic_t should_report_; + // Minimum number of node in the queue. When the queue size is less than + // this variable, the nodes are added directly to the queue instead of + // the local stack. This also determines the end of the ramp-up phase. + i_t min_node_queue_size_; // In case, a best-first thread encounters a numerical issue when solving a node, // its blocks the progression of the lower bound. @@ -230,82 +206,48 @@ class branch_and_bound_t { void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - bnb_worker_type_t thread_type); + search_strategy_t thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - // Initialize diving heuristics settings - void initialize_diving_heuristics_settings(std::vector& diving_strategies); - - // Ramp-up phase of the solver, where we greedily expand the tree until - // there is enough unexplored nodes. This is done recursively using OpenMP tasks. - void exploration_ramp_up(mip_node_t* node, i_t initial_heap_size); - // We use best-first to pick the `start_node` and then perform a depth-first search // from this node (i.e., a plunge). It can only backtrack to a sibling node. // Unexplored nodes in the subtree are inserted back into the global heap. - void plunge_from(i_t task_id, - mip_node_t* start_node, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); - - // Each "main" thread pops a node from the global heap and then performs a plunge - // (i.e., a shallow dive) into the subtree determined by the node. - void best_first_thread(i_t task_id); + void plunge_with(branch_and_bound_worker_t* worker); // Perform a deep dive in the subtree determined by the `start_node` in order // to find integer feasible solutions. - void dive_from(mip_node_t& start_node, - const std::vector& start_lower, - const std::vector& start_upper, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - bnb_worker_type_t diving_type); - - // Each diving thread pops the first node from the dive queue and then performs - // a deep dive into the subtree determined by the node. - void diving_thread(bnb_worker_type_t diving_type); + void dive_with(branch_and_bound_worker_t* worker); + + // Run the scheduler whose will schedule and manage + // all the other workers. + void run_scheduler(); + + // Run the branch-and-bound algorithm in single threaded mode. + // This disable all diving heuristics. + void single_threaded_solve(); // Solve the LP relaxation of a leaf node dual::status_t solve_node_lp(mip_node_t* node_ptr, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - bnb_worker_type_t thread_type, - bool recompute_bounds_and_basis, - const std::vector& root_lower, - const std::vector& root_upper, - bnb_stats_t& stats, + branch_and_bound_worker_t* worker, + branch_and_bound_stats_t& stats, logger_t& log); // Update the tree based on the LP relaxation. Returns the status // of the node and, if appropriated, the preferred rounding direction // when visiting the children. - std::pair update_tree(mip_node_t* node_ptr, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - lp_solution_t& leaf_solution, - std::vector& leaf_edge_norms, - bnb_worker_type_t thread_type, - dual::status_t lp_status, - logger_t& log); + std::pair update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + branch_and_bound_worker_t* worker, + dual::status_t lp_status, + logger_t& log); // Selects the variable to branch on. branch_variable_t variable_selection(mip_node_t* node_ptr, const std::vector& fractional, - const std::vector& solution, - bnb_worker_type_t type); + branch_and_bound_worker_t* worker); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/branch_and_bound_worker.hpp b/cpp/src/dual_simplex/branch_and_bound_worker.hpp new file mode 100644 index 000000000..99a59a871 --- /dev/null +++ b/cpp/src/dual_simplex/branch_and_bound_worker.hpp @@ -0,0 +1,280 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int num_search_strategies = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum search_strategy_t : int { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +template +struct branch_and_bound_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + omp_atomic_t nodes_since_last_log = 0; + omp_atomic_t last_log = 0.0; +}; + +template +class branch_and_bound_worker_t { + public: + const i_t worker_id; + omp_atomic_t search_strategy; + omp_atomic_t is_active; + omp_atomic_t lower_bound; + + lp_problem_t leaf_problem; + lp_solution_t leaf_solution; + std::vector leaf_edge_norms; + + basis_update_mpf_t basis_factors; + std::vector basic_list; + std::vector nonbasic_list; + + bounds_strengthening_t node_presolver; + std::vector bounds_changed; + + std::vector start_lower; + std::vector start_upper; + mip_node_t* start_node; + + pcgenerator_t rng; + + bool recompute_basis = true; + bool recompute_bounds = true; + + branch_and_bound_worker_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + : worker_id(worker_id), + search_strategy(BEST_FIRST), + is_active(false), + lower_bound(-std::numeric_limits::infinity()), + leaf_problem(original_lp), + leaf_solution(original_lp.num_rows, original_lp.num_cols), + basis_factors(original_lp.num_rows, settings.refactor_frequency), + basic_list(original_lp.num_rows), + nonbasic_list(), + node_presolver(leaf_problem, Arow, {}, var_type), + bounds_changed(original_lp.num_cols, false), + rng(settings.random_seed + pcgenerator_t::default_seed + worker_id, + pcgenerator_t::default_stream ^ worker_id) + { + } + + // Set the `start_node` for best-first search. + void init_best_first(mip_node_t* node, const lp_problem_t& original_lp) + { + start_node = node; + start_lower = original_lp.lower; + start_upper = original_lp.upper; + search_strategy = BEST_FIRST; + lower_bound = node->lower_bound; + is_active = true; + } + + // Initialize the worker for diving, setting the `start_node`, `start_lower` and + // `start_upper`. Returns `true` if the starting node is feasible via + // bounds propagation. + bool init_diving(mip_node_t* node, + search_strategy_t type, + const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings) + { + internal_node = node->detach_copy(); + start_node = &internal_node; + + start_lower = original_lp.lower; + start_upper = original_lp.upper; + search_strategy = type; + lower_bound = node->lower_bound; + is_active = true; + + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + node->get_variable_bounds(start_lower, start_upper, bounds_changed); + return node_presolver.bounds_strengthening(settings, bounds_changed, start_lower, start_upper); + } + + // Set the variables bounds for the LP relaxation of the current node. + bool set_lp_variable_bounds(mip_node_t* node_ptr, + const simplex_solver_settings_t& settings) + { + // Reset the bound_changed markers + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + + // Set the correct bounds for the leaf problem + if (recompute_bounds) { + leaf_problem.lower = start_lower; + leaf_problem.upper = start_upper; + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + } else { + node_ptr->update_branched_variable_bounds( + leaf_problem.lower, leaf_problem.upper, bounds_changed); + } + + return node_presolver.bounds_strengthening( + settings, bounds_changed, leaf_problem.lower, leaf_problem.upper); + } + + private: + // For diving, we need to store the full node instead of + // of just a pointer, since it is not stored in the tree anymore. + // To keep the same interface across all worker types, + // this will be used as a temporary storage and + // will be pointed by `start_node`. + // For exploration, this will not be used. + mip_node_t internal_node; +}; + +template +class branch_and_bound_worker_pool_t { + public: + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + { + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = std::make_unique>( + i, original_lp, Arow, var_type, settings); + idle_workers_.push_front(i); + } + + is_initialized = true; + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + branch_and_bound_worker_t* get_idle_worker() + { + std::lock_guard lock(mutex_); + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + return workers_[idx].get(); + } + } + + // Here, we are assuming that the scheduler is the only + // thread that can retrieve/pop an idle worker. + void pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (!idle_workers_.empty()) { + idle_workers_.pop_front(); + num_idle_workers_--; + } + } + + void return_worker_to_pool(branch_and_bound_worker_t* worker) + { + worker->is_active = false; + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bound() + { + f_t lower_bound = std::numeric_limits::infinity(); + + if (is_initialized) { + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->search_strategy == BEST_FIRST && workers_[i]->is_active) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + } + + return lower_bound; + } + + i_t num_idle_workers() { return num_idle_workers_; } + + private: + // Worker pool + std::vector>> workers_; + bool is_initialized = false; + + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; +}; + +template +std::vector get_search_strategies( + diving_heuristics_settings_t settings) +{ + std::vector types; + types.reserve(num_search_strategies); + types.push_back(BEST_FIRST); + if (settings.pseudocost_diving != 0) { types.push_back(PSEUDOCOST_DIVING); } + if (settings.line_search_diving != 0) { types.push_back(LINE_SEARCH_DIVING); } + if (settings.guided_diving != 0) { types.push_back(GUIDED_DIVING); } + if (settings.coefficient_diving != 0) { types.push_back(COEFFICIENT_DIVING); } + return types; +} + +template +std::array get_max_workers( + i_t num_workers, const std::vector& strategies) +{ + std::array max_num_workers; + max_num_workers.fill(0); + + i_t bfs_workers = std::max(strategies.size() == 1 ? num_workers : num_workers / 4, 1); + max_num_workers[BEST_FIRST] = bfs_workers; + + i_t diving_workers = (num_workers - bfs_workers); + i_t m = strategies.size() - 1; + + for (size_t i = 1, k = 0; i < strategies.size(); ++i) { + i_t start = (double)k * diving_workers / m; + i_t end = (double)(k + 1) * diving_workers / m; + max_num_workers[strategies[i]] = end - start; + ++k; + } + + return max_num_workers; +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_heuristics.cpp b/cpp/src/dual_simplex/diving_heuristics.cpp index a56b4cce3..b3e91bc09 100644 --- a/cpp/src/dual_simplex/diving_heuristics.cpp +++ b/cpp/src/dual_simplex/diving_heuristics.cpp @@ -71,9 +71,8 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, const std::vector& root_solution, logger_t& log) { - std::lock_guard lock(pc.mutex); i_t branch_var = -1; - f_t max_score = std::numeric_limits::lowest(); + f_t max_score = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; constexpr f_t eps = 1e-6; @@ -127,6 +126,15 @@ branch_variable_t pseudocost_diving(pseudo_costs_t& pc, } } + // If we cannot choose the variable, then arbitrarily pick the first + // fractional variable and round it down. This only happens when + // there is only one fractional variable and its the pseudocost is + // infinite for both direction. + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + assert(round_dir != rounding_direction_t::NONE); assert(branch_var >= 0); @@ -146,9 +154,8 @@ branch_variable_t guided_diving(pseudo_costs_t& pc, const std::vector& incumbent, logger_t& log) { - std::lock_guard lock(pc.mutex); i_t branch_var = -1; - f_t max_score = std::numeric_limits::lowest(); + f_t max_score = -1; rounding_direction_t round_dir = rounding_direction_t::NONE; constexpr f_t eps = 1e-6; diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 5ee4f49d1..8b2211c1d 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -227,18 +227,26 @@ class mip_node_t { } // This method creates a copy of the current node - // with its parent set to `nullptr` and `depth = 0`. + // with its parent set to `nullptr` // This detaches the node from the tree. mip_node_t detach_copy() const { - mip_node_t copy(lower_bound, vstatus); + mip_node_t copy; + copy.lower_bound = lower_bound; + copy.objective_estimate = objective_estimate; + copy.depth = depth; + copy.node_id = node_id; + copy.integer_infeasible = integer_infeasible; + copy.vstatus = vstatus; copy.branch_var = branch_var; copy.branch_dir = branch_dir; copy.branch_var_lower = branch_var_lower; copy.branch_var_upper = branch_var_upper; copy.fractional_val = fractional_val; - copy.objective_estimate = objective_estimate; - copy.node_id = node_id; + copy.parent = nullptr; + copy.children[0] = nullptr; + copy.children[1] = nullptr; + copy.status = node_status_t::PENDING; return copy; } diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp index 28072795a..b5c9a9ea7 100644 --- a/cpp/src/dual_simplex/node_queue.hpp +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -114,26 +114,29 @@ class node_queue_t { std::optional*> pop_best_first() { + std::lock_guard lock(mutex); auto entry = best_first_heap.pop(); + if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } + return std::nullopt; } std::optional*> pop_diving() { + std::lock_guard lock(mutex); + while (!diving_heap.empty()) { auto entry = diving_heap.pop(); + if (entry.has_value()) { if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } } } + return std::nullopt; } - void lock() { mutex.lock(); } - - void unlock() { mutex.unlock(); } - i_t diving_queue_size() { std::lock_guard lock(mutex); diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 978896887..d104036ea 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -619,7 +619,7 @@ void convert_user_problem(const user_problem_t& user_problem, } constexpr bool run_bounds_strengthening = false; - if (run_bounds_strengthening) { + if constexpr (run_bounds_strengthening) { csr_matrix_t Arow(1, 1, 1); problem.A.to_compressed_row(Arow); diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 015034b77..682bdaa6f 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -88,7 +88,7 @@ void strong_branch_helper(i_t start, } if (branch == 0) { - pc.strong_branch_down[k] = obj - root_obj; + pc.strong_branch_down[k] = std::max(obj - root_obj, 0.0); if (verbose) { settings.log.printf("Thread id %2d remaining %d variable %d branch %d obj %e time %.2f\n", thread_id, @@ -99,7 +99,7 @@ void strong_branch_helper(i_t start, toc(start_time)); } } else { - pc.strong_branch_up[k] = obj - root_obj; + pc.strong_branch_up[k] = std::max(obj - root_obj, 0.0); if (verbose) { settings.log.printf( "Thread id %2d remaining %d variable %d branch %d obj %e change down %e change up %e " @@ -135,6 +135,83 @@ void strong_branch_helper(i_t start, } } +template +f_t trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + f_t start_time, + i_t upper_max_lp_iter, + i_t lower_max_lp_iter, + omp_atomic_t& total_lp_iter) +{ + lp_problem_t child_problem = original_lp; + child_problem.lower[branch_var] = branch_var_lower; + child_problem.upper[branch_var] = branch_var_upper; + + const bool initialize_basis = false; + simplex_solver_settings_t child_settings = settings; + child_settings.set_log(false); + i_t lp_iter_upper = upper_max_lp_iter; + i_t lp_iter_lower = lower_max_lp_iter; + child_settings.iteration_limit = std::clamp(bnb_lp_iter_per_node, lp_iter_lower, lp_iter_upper); + child_settings.cut_off = upper_bound + settings.dual_tol; + child_settings.inside_mip = 2; + child_settings.scale_columns = false; + + lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); + i_t iter = 0; + std::vector child_vstatus = vstatus; + std::vector child_edge_norms = edge_norms; + std::vector child_basic_list = basic_list; + std::vector child_nonbasic_list = nonbasic_list; + basis_update_mpf_t child_basis_factors = basis_factors; + + // Only refactor the basis if we encounter numerical issues. + child_basis_factors.set_refactor_frequency(upper_max_lp_iter); + + dual::status_t status = dual_phase2_with_advanced_basis(2, + 0, + initialize_basis, + start_time, + child_problem, + child_settings, + child_vstatus, + child_basis_factors, + child_basic_list, + child_nonbasic_list, + solution, + iter, + child_edge_norms); + total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", + branch_var, + child_problem.lower[branch_var], + child_problem.upper[branch_var], + iter, + dual::status_to_string(status).c_str(), + compute_objective(child_problem, solution.x)); + + if (status == dual::status_t::DUAL_UNBOUNDED) { + // LP was infeasible + return std::numeric_limits::infinity(); + } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || + status == dual::status_t::CUTOFF) { + return compute_objective(child_problem, solution.x); + } else { + return std::numeric_limits::quiet_NaN(); + } +} + } // namespace template @@ -350,15 +427,31 @@ void strong_branching(const user_problem_t& original_problem, pc.update_pseudo_costs_from_strong_branching(fractional, root_soln); } +template +f_t pseudo_costs_t::calculate_pseudocost_score(i_t j, + const std::vector& solution, + f_t pseudo_cost_up_avg, + f_t pseudo_cost_down_avg) const +{ + constexpr f_t eps = 1e-6; + i_t num_up = pseudo_cost_num_up[j]; + i_t num_down = pseudo_cost_num_down[j]; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + return std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); +} + template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - std::lock_guard lock(mutex); - const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; + const f_t change_in_obj = std::max(leaf_objective - node_ptr->lower_bound, 0.0); const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) : std::ceil(node_ptr->fractional_val) - node_ptr->fractional_val; + if (node_ptr->branch_dir == rounding_direction_t::DOWN) { pseudo_cost_sum_down[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_down[node_ptr->branch_var]++; @@ -412,13 +505,55 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const std::vector& solution, logger_t& log) { - std::lock_guard lock(mutex); + i_t branch_var = fractional[0]; + f_t max_score = -1; + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; - const i_t num_fractional = fractional.size(); - std::vector pseudo_cost_up(num_fractional); - std::vector pseudo_cost_down(num_fractional); - std::vector score(num_fractional); + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + log.printf("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); + + for (i_t j : fractional) { + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + if (score > max_score) { + max_score = score; + branch_var = j; + } + } + + log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", + branch_var, + solution[branch_var], + max_score); + + return branch_var; +} + +template +i_t pseudo_costs_t::reliable_variable_selection( + mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + branch_and_bound_worker_t* worker, + const branch_and_bound_stats_t& bnb_stats, + f_t upper_bound, + int max_num_tasks, + logger_t& log) +{ + constexpr f_t eps = 1e-6; + f_t start_time = bnb_stats.start_time; + i_t branch_var = fractional[0]; + f_t max_score = -1; i_t num_initialized_down; i_t num_initialized_up; f_t pseudo_cost_down_avg; @@ -432,39 +567,162 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio pseudo_cost_down_avg, pseudo_cost_up_avg); - for (i_t k = 0; k < num_fractional; k++) { - const i_t j = fractional[k]; - if (pseudo_cost_num_down[j] != 0) { - pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } else { - pseudo_cost_down[k] = pseudo_cost_down_avg; + const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; + const int64_t branch_and_bound_explored = bnb_stats.nodes_explored; + const i_t branch_and_bound_lp_iter_per_node = + branch_and_bound_lp_iters / bnb_stats.nodes_explored; + + i_t reliable_threshold = settings.reliability_branching; + if (reliable_threshold < 0) { + const i_t max_threshold = reliability_branching_settings.max_reliable_threshold; + const i_t min_threshold = reliability_branching_settings.min_reliable_threshold; + const f_t iter_factor = reliability_branching_settings.bnb_lp_factor; + const i_t iter_offset = reliability_branching_settings.bnb_lp_offset; + const int64_t alpha = iter_factor * branch_and_bound_lp_iters; + const int64_t max_reliability_iter = alpha + reliability_branching_settings.bnb_lp_offset; + + f_t iter_fraction = + (max_reliability_iter - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0); + iter_fraction = std::min(1.0, iter_fraction); + iter_fraction = std::max((alpha - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0), + iter_fraction); + reliable_threshold = (1 - iter_fraction) * min_threshold + iter_fraction * max_threshold; + reliable_threshold = strong_branching_lp_iter < max_reliability_iter ? reliable_threshold : 0; + } + + std::vector unreliable_list; + omp_mutex_t score_mutex; + + for (i_t j : fractional) { + if (pseudo_cost_num_down[j] < reliable_threshold || + pseudo_cost_num_up[j] < reliable_threshold) { + unreliable_list.push_back(j); + continue; } - if (pseudo_cost_num_up[j] != 0) { - pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } else { - pseudo_cost_up[k] = pseudo_cost_up_avg; + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + if (score > max_score) { + max_score = score; + branch_var = j; } - constexpr f_t eps = 1e-6; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - score[k] = - std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps); } - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t select = -1; - for (i_t k = 0; k < num_fractional; k++) { - if (score[k] > max_score) { - max_score = score[k]; - branch_var = fractional[k]; - select = k; + if (unreliable_list.empty()) { + log.printf( + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); + + return branch_var; + } + + const int num_tasks = std::max(max_num_tasks, 1); + const int task_priority = reliability_branching_settings.task_priority; + const i_t max_num_candidates = reliability_branching_settings.max_num_candidates; + const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); + + assert(task_priority > 0); + assert(max_num_candidates > 0); + assert(num_candidates > 0); + assert(num_tasks > 0); + + log.printf( + "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", + strong_branching_lp_iter.load(), + branch_and_bound_lp_iters, + unreliable_list.size(), + num_tasks, + reliable_threshold); + + // Shuffle the unreliable list so every variable has the same chance to be selected. + if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); } + + if (toc(start_time) > settings.time_limit) { + log.printf("Time limit reached"); + return branch_var; + } + +#pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ + shared(score_mutex) + for (i_t i = 0; i < num_candidates; ++i) { + const i_t j = unreliable_list[i]; + + if (toc(start_time) > settings.time_limit) { continue; } + + pseudo_cost_mutex_down[j].lock(); + if (pseudo_cost_num_down[j] < reliable_threshold) { + // Do trial branching on the down branch + f_t obj = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + worker->leaf_problem.lower[j], + std::floor(solution[j]), + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = solution[j] - std::floor(solution[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + } } + pseudo_cost_mutex_down[j].unlock(); + + if (toc(start_time) > settings.time_limit) { continue; } + + pseudo_cost_mutex_up[j].lock(); + if (pseudo_cost_num_up[j] < reliable_threshold) { + f_t obj = trial_branching(worker->leaf_problem, + settings, + var_types, + node_ptr->vstatus, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + j, + std::ceil(solution[j]), + worker->leaf_problem.upper[j], + upper_bound, + branch_and_bound_lp_iter_per_node, + start_time, + reliability_branching_settings.upper_max_lp_iter, + reliability_branching_settings.lower_max_lp_iter, + strong_branching_lp_iter); + + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + } + } + pseudo_cost_mutex_up[j].unlock(); + + if (toc(start_time) > settings.time_limit) { continue; } + + f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); + + score_mutex.lock(); + if (score > max_score) { + max_score = score; + branch_var = j; + } + score_mutex.unlock(); } log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); return branch_var; } @@ -475,8 +733,6 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, f_t lower_bound, logger_t& log) { - std::lock_guard lock(mutex); - const i_t num_fractional = fractional.size(); f_t estimate = lower_bound; @@ -487,26 +743,15 @@ f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - for (i_t k = 0; k < num_fractional; k++) { - const i_t j = fractional[k]; - f_t pseudo_cost_down = 0; - f_t pseudo_cost_up = 0; - - if (pseudo_cost_num_down[j] != 0) { - pseudo_cost_down = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; - } else { - pseudo_cost_down = pseudo_cost_down_avg; - } - - if (pseudo_cost_num_up[j] != 0) { - pseudo_cost_up = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; - } else { - pseudo_cost_up = pseudo_cost_up_avg; - } + for (i_t j : fractional) { constexpr f_t eps = 1e-6; - const f_t f_down = solution[j] - std::floor(solution[j]); - const f_t f_up = std::ceil(solution[j]) - solution[j]; - estimate += std::min(pseudo_cost_down * f_down, pseudo_cost_up * f_up); + i_t num_up = pseudo_cost_num_up[j]; + i_t num_down = pseudo_cost_num_down[j]; + f_t pc_up = num_up > 0 ? pseudo_cost_sum_up[j] / num_up : pseudo_cost_up_avg; + f_t pc_down = num_down > 0 ? pseudo_cost_sum_down[j] / num_down : pseudo_cost_down_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + estimate += std::min(pc_down * f_down, pc_up * f_up); } log.printf("pseudocost estimate = %e\n", estimate); diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 4bdde710f..1d3b0deef 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -7,16 +7,52 @@ #pragma once +#include +#include #include #include #include #include #include +#include #include namespace cuopt::linear_programming::dual_simplex { +template +struct reliability_branching_settings_t { + // Lower bound for the maximum number of LP iterations for a single trial branching + i_t lower_max_lp_iter = 10; + + // Upper bound for the maximum number of LP iterations for a single trial branching + i_t upper_max_lp_iter = 500; + + // Priority of the tasks created when running the trial branching in parallel. + // Set to 1 to have the same priority as the other tasks. + i_t task_priority = 5; + + // The maximum number of candidates initialized by strong branching in a single + // node + i_t max_num_candidates = 100; + + // Define the maximum number of iteration spent in strong branching. + // Let `bnb_lp_iter` = total number of iterations in B&B, then + // `max iter in strong branching = bnb_lp_factor * bnb_lp_iter + bnb_lp_offset`. + // This is used for determining the `reliable_threshold`. + f_t bnb_lp_factor = 0.5; + i_t bnb_lp_offset = 100000; + + // Maximum and minimum points in curve to determine the value + // of the `reliable_threshold` based on the current number of LP + // iterations in strong branching and B&B. Since it is a + // a curve, the actual value of `reliable_threshold` may be + // higher than `max_reliable_threshold`. + // Only used when `reliable_threshold` is negative + i_t max_reliable_threshold = 5; + i_t min_reliable_threshold = 1; +}; + template class pseudo_costs_t { public: @@ -24,7 +60,9 @@ class pseudo_costs_t { : pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), pseudo_cost_num_down(num_variables), - pseudo_cost_num_up(num_variables) + pseudo_cost_num_up(num_variables), + pseudo_cost_mutex_up(num_variables), + pseudo_cost_mutex_down(num_variables) { } @@ -36,6 +74,8 @@ class pseudo_costs_t { pseudo_cost_sum_up.assign(num_variables, 0); pseudo_cost_num_down.assign(num_variables, 0); pseudo_cost_num_up.assign(num_variables, 0); + pseudo_cost_mutex_up.resize(num_variables); + pseudo_cost_mutex_down.resize(num_variables); } void initialized(i_t& num_initialized_down, @@ -52,17 +92,37 @@ class pseudo_costs_t { const std::vector& solution, logger_t& log); + i_t reliable_variable_selection(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + branch_and_bound_worker_t* worker, + const branch_and_bound_stats_t& bnb_stats, + f_t upper_bound, + int max_num_tasks, + logger_t& log); + void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); - std::vector pseudo_cost_sum_up; - std::vector pseudo_cost_sum_down; - std::vector pseudo_cost_num_up; - std::vector pseudo_cost_num_down; + + f_t calculate_pseudocost_score(i_t j, + const std::vector& solution, + f_t pseudo_cost_up_avg, + f_t pseudo_cost_down_avg) const; + + reliability_branching_settings_t reliability_branching_settings; + + std::vector> pseudo_cost_sum_up; + std::vector> pseudo_cost_sum_down; + std::vector> pseudo_cost_num_up; + std::vector> pseudo_cost_num_down; std::vector strong_branch_down; std::vector strong_branch_up; - - omp_mutex_t mutex; + std::vector pseudo_cost_mutex_up; + std::vector pseudo_cost_mutex_down; omp_atomic_t num_strong_branches_completed = 0; + omp_atomic_t strong_branching_lp_iter = 0; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index f9911ee53..34c384949 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -22,18 +22,25 @@ namespace cuopt::linear_programming::dual_simplex { template struct diving_heuristics_settings_t { - i_t num_diving_workers = -1; - // -1 automatic, 0 disabled, 1 enabled i_t line_search_diving = -1; i_t pseudocost_diving = -1; i_t guided_diving = -1; i_t coefficient_diving = -1; - i_t min_node_depth = 10; - i_t node_limit = 500; + // The minimum depth to start diving from. + i_t min_node_depth = 10; + + // The maximum number of nodes when performing a dive. + i_t node_limit = 500; + + // The maximum number of dual simplex iteration allowed + // in a single dive. This set in terms of the total number of + // iterations in the best-first threads. f_t iteration_limit_factor = 0.05; - i_t backtrack_limit = 5; + + // The maximum backtracking allowed. + i_t backtrack_limit = 5; }; template @@ -95,16 +102,14 @@ struct simplex_solver_settings_t { reduced_cost_strengthening(-1), cut_change_threshold(1e-3), cut_min_orthogonality(0.5), - num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), + reliability_branching(-1), inside_mip(0), sub_mip(0), - reliability_branching(-1), solution_callback(nullptr), heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) { - diving_settings.num_diving_workers = std::max(num_threads - num_bfs_workers, 1); } void set_log(bool logging) const { log.log = logging; } @@ -175,15 +180,20 @@ struct simplex_solver_settings_t { // strengthening f_t cut_change_threshold; // threshold for cut change f_t cut_min_orthogonality; // minimum orthogonality for cuts - i_t num_bfs_workers; // number of threads dedicated to the best-first search i_t mip_batch_pdlp_strong_branching{0}; // 0 if not using batch PDLP for strong branching, 1 if // using batch PDLP for strong branching diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + // Settings for the reliability branching. + // - -1: automatic + // - 0: disable (use pseudocost branching instead) + // - k > 0, a variable is considered reliable if it has been branched on k times. + i_t reliability_branching; + i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node i_t sub_mip; // 0 if in regular MIP solve, 1 if in sub-MIP solve - i_t reliability_branching; // -1 automatic, 0 to disable, >0 to enable reliability branching + std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp index a678e2fd7..5739cedae 100644 --- a/cpp/src/dual_simplex/solution.hpp +++ b/cpp/src/dual_simplex/solution.hpp @@ -72,8 +72,8 @@ class mip_solution_t { std::vector x; f_t objective; f_t lower_bound; - i_t nodes_explored; - i_t simplex_iterations; + int64_t nodes_explored; + int64_t simplex_iterations; bool has_incumbent; }; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 9dc6ac9c5..0858bb75a 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -98,6 +98,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}, {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0}, + {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1} }; // Bool parameters diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index cb086e145..7b05f1964 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -259,20 +259,13 @@ void rins_t::run_rins() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = std::min(current_mip_gap, (f_t)settings.target_mip_gap); - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_workers = 1; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.sub_mip = 1; - - // In the future, let RINS use all the diving heuristics. For now, - // restricting to guided diving. - branch_and_bound_settings.diving_settings.num_diving_workers = 1; - branch_and_bound_settings.diving_settings.line_search_diving = 0; - branch_and_bound_settings.diving_settings.coefficient_diving = 0; - branch_and_bound_settings.diving_settings.pseudocost_diving = 0; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 1; + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index e636e7471..6e1c4d4ae 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -104,19 +104,12 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_workers = 1; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.sub_mip = 1; - - // In the future, let SubMIP use all the diving heuristics. For now, - // restricting to guided diving. - branch_and_bound_settings.diving_settings.num_diving_workers = 1; - branch_and_bound_settings.diving_settings.line_search_diving = 0; - branch_and_bound_settings.diving_settings.coefficient_diving = 0; - branch_and_bound_settings.diving_settings.pseudocost_diving = 0; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 1; + branch_and_bound_settings.reliability_branching = 0; + branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 2829babc9..8583a76f8 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -180,9 +180,10 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; - branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.reliability_branching = solver_settings_.reliability_branching; + branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; + branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; branch_and_bound_settings.mixed_integer_gomory_cuts = context.settings.mixed_integer_gomory_cuts; branch_and_bound_settings.knapsack_cuts = context.settings.knapsack_cuts; @@ -192,21 +193,15 @@ solution_t mip_solver_t::run_solver() context.settings.reduced_cost_strengthening; branch_and_bound_settings.cut_change_threshold = context.settings.cut_change_threshold; branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality; + branch_and_bound_settings.mip_batch_pdlp_strong_branching = + context.settings.mip_batch_pdlp_strong_branching; if (context.settings.num_cpu_threads < 0) { - branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; + branch_and_bound_settings.num_threads = std::max(1, omp_get_max_threads() - 1); } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } - i_t num_threads = branch_and_bound_settings.num_threads; - i_t num_bfs_workers = std::max(1, num_threads / 4); - i_t num_diving_workers = std::max(1, num_threads - num_bfs_workers); - branch_and_bound_settings.num_bfs_workers = num_bfs_workers; - branch_and_bound_settings.diving_settings.num_diving_workers = num_diving_workers; - branch_and_bound_settings.mip_batch_pdlp_strong_branching = - context.settings.mip_batch_pdlp_strong_branching; - // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = std::bind(&branch_and_bound_solution_helper_t::solution_callback, diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index e1b68bf88..f6e66472d 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -10,7 +10,8 @@ #ifdef _OPENMP #include -#include +#include +#include namespace cuopt { @@ -18,14 +19,39 @@ namespace cuopt { // https://www.openmp.org/spec-html/5.1/openmpse39.html#x224-2570003.9 class omp_mutex_t { public: - omp_mutex_t() { omp_init_lock(&mutex); } - virtual ~omp_mutex_t() { omp_destroy_lock(&mutex); } - void lock() { omp_set_lock(&mutex); } - void unlock() { omp_unset_lock(&mutex); } - bool try_lock() { return omp_test_lock(&mutex); } + omp_mutex_t() : mutex(new omp_lock_t) { omp_init_lock(mutex.get()); } + + omp_mutex_t(const omp_mutex_t&) = delete; + + omp_mutex_t(omp_mutex_t&& other) { *this = std::move(other); } + + omp_mutex_t& operator=(const omp_mutex_t&) = delete; + + omp_mutex_t& operator=(omp_mutex_t&& other) + { + if (&other != this) { + if (mutex) { omp_destroy_lock(mutex.get()); } + mutex = std::move(other.mutex); + } + return *this; + } + + virtual ~omp_mutex_t() + { + if (mutex) { + omp_destroy_lock(mutex.get()); + mutex.reset(); + } + } + + void lock() { omp_set_lock(mutex.get()); } + + void unlock() { omp_unset_lock(mutex.get()); } + + bool try_lock() { return omp_test_lock(mutex.get()); } private: - omp_lock_t mutex; + std::unique_ptr mutex; }; // Wrapper for omp atomic operations. See @@ -42,7 +68,7 @@ class omp_atomic_t { return new_val; } - operator T() { return load(); } + operator T() const { return load(); } T operator+=(T inc) { return fetch_add(inc) + inc; } T operator-=(T inc) { return fetch_sub(inc) - inc; } diff --git a/cpp/src/utilities/pcgenerator.hpp b/cpp/src/utilities/pcgenerator.hpp new file mode 100644 index 000000000..29a865f02 --- /dev/null +++ b/cpp/src/utilities/pcgenerator.hpp @@ -0,0 +1,159 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +// Copied from raft/PCGenerator (rng_device.cuh). We cannot use raft implementation +// on the CPU (.cpp file) since the raft header includes CUDA code. +// The original code is from https://www.pcg-random.org/. +namespace cuopt { +class pcgenerator_t { + public: + static constexpr uint64_t default_seed = 0x853c49e6748fea9bULL; + static constexpr uint64_t default_stream = 0xda3e39cb94b95bdbULL; + + /** + * @brief ctor. Initializes the PCG + * @param rng_state is the generator state used for initializing the generator + * @param subsequence specifies the subsequence to be generated out of 2^64 possible subsequences + * In a parallel setting, like threads of a CUDA kernel, each thread is required to generate a + * unique set of random numbers. This can be achieved by initializing the generator with same + * rng_state for all the threads and diststreamt values for subsequence. + */ + pcgenerator_t(const uint64_t seed = default_seed, + const uint64_t subsequence = default_stream, + uint64_t offset = 0) + { + set_seed(seed, subsequence, offset); + } + + // Set the seed, subsequence and offset of the PCG + void set_seed(uint64_t seed, const uint64_t subsequence = default_stream, uint64_t offset = 0) + { + state = uint64_t(0); + stream = (subsequence << 1u) | 1u; + uint32_t discard; + next(discard); + state += seed; + next(discard); + skipahead(offset); + } + + // Based on "Random Number Generation with Arbitrary Strides" F. B. Brown + // Link https://mcnp.lanl.gov/pdf_files/anl-rn-arb-stride.pdf + void skipahead(uint64_t offset) + { + uint64_t G = 1; + uint64_t h = 6364136223846793005ULL; + uint64_t C = 0; + uint64_t f = stream; + while (offset) { + if (offset & 1) { + G = G * h; + C = C * h + f; + } + f = f * (h + 1); + h = h * h; + offset >>= 1; + } + state = state * G + C; + } + + /** + * @defgroup NextRand Generate the next random number + * @brief This code is derived from PCG basic code + * @{ + */ + uint32_t next_u32() + { + uint32_t ret; + uint64_t oldstate = state; + state = oldstate * 6364136223846793005ULL + stream; + uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; + uint32_t rot = oldstate >> 59u; + ret = (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); + return ret; + } + + uint64_t next_u64() + { + uint64_t ret; + uint32_t a, b; + a = next_u32(); + b = next_u32(); + ret = uint64_t(a) | (uint64_t(b) << 32); + return ret; + } + + int32_t next_i32() + { + int32_t ret; + uint32_t val; + val = next_u32(); + ret = int32_t(val & 0x7fffffff); + return ret; + } + + int64_t next_i64() + { + int64_t ret; + uint64_t val; + val = next_u64(); + ret = int64_t(val & 0x7fffffffffffffff); + return ret; + } + + float next_float() { return static_cast((next_u32() >> 8) * 0x1.0p-24); } + + double next_double() { return static_cast((next_u64() >> 11) * 0x1.0p-53); } + + template + T next() + { + T val; + next(val); + return val; + } + + void next(uint32_t& ret) { ret = next_u32(); } + void next(uint64_t& ret) { ret = next_u64(); } + void next(int32_t& ret) { ret = next_i32(); } + void next(int64_t& ret) { ret = next_i64(); } + void next(float& ret) { ret = next_float(); } + void next(double& ret) { ret = next_double(); } + + /// Draws a sample from a uniform distribution. The samples are uniformly distributed over + /// the semi-closed interval `[low, high)`. This routine may have a **slight bias** toward + /// some numbers in the range (scaling by floating-point). + template + T uniform(T low, T high) + { + double val = next_double(); + T range = high - low; + return low + (val * range); + } + + // Shuffles the contents of a sequence using the Fisher–Yates algorithm. + template + void shuffle(std::vector& seq) + { + if (seq.empty()) { return; } + for (size_t i = 0; i < seq.size() - 1; ++i) { + size_t j = uniform(i, seq.size()); + if (j != i) std::swap(seq[i], seq[j]); + } + } + + private: + uint64_t state; + uint64_t stream; +}; +} // namespace cuopt