diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp index f1bf52c1e..c56c9db98 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.cpp +++ b/cpp/src/dual_simplex/bounds_strengthening.cpp @@ -154,7 +154,7 @@ bool bounds_strengthening_t::bounds_strengthening( bool is_infeasible = check_infeasibility(min_a, max_a, cnst_lb, cnst_ub, settings.primal_tol); if (is_infeasible) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible constraint %d, cnst_lb %e, cnst_ub %e, min_a %e, max_a %e\n", iter, i, @@ -211,7 +211,7 @@ bool bounds_strengthening_t::bounds_strengthening( new_ub = std::min(new_ub, upper_bounds[k]); if (new_lb > new_ub + 1e-6) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); return false; } diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 6161f4d3f..ff55d06f7 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -244,25 +244,15 @@ f_t branch_and_bound_t::get_upper_bound() template f_t branch_and_bound_t::get_lower_bound() { - f_t lower_bound = lower_bound_ceiling_.load(); - mutex_heap_.lock(); - if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } - mutex_heap_.unlock(); + f_t lower_bound = lower_bound_ceiling_.load(); + f_t heap_lower_bound = node_queue.get_lower_bound(); + lower_bound = std::min(heap_lower_bound, lower_bound); for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); } - return lower_bound; -} - -template -i_t branch_and_bound_t::get_heap_size() -{ - mutex_heap_.lock(); - i_t size = heap_.size(); - mutex_heap_.unlock(); - return size; + return std::isfinite(lower_bound) ? lower_bound : -inf; } template @@ -589,6 +579,7 @@ node_solve_info_t branch_and_bound_t::solve_node( bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log) { const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; @@ -605,6 +596,13 @@ node_solve_info_t branch_and_bound_t::solve_node( lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; + if (thread_type != thread_type_t::EXPLORATION) { + i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + f_t max_iter = 0.05 * bnb_lp_iters; + lp_settings.iteration_limit = max_iter - stats.total_lp_iters; + if (lp_settings.iteration_limit <= 0) { return node_solve_info_t::ITERATION_LIMIT; } + } + #ifdef LOG_NODE_SIMPLEX lp_settings.set_log(true); std::stringstream ss; @@ -679,10 +677,8 @@ node_solve_info_t branch_and_bound_t::solve_node( lp_status = convert_lp_status_to_dual_status(second_status); } - if (thread_type == thread_type_t::EXPLORATION) { - exploration_stats_.total_lp_solve_time += toc(lp_start_time); - exploration_stats_.total_lp_iters += node_iter; - } + stats.total_lp_solve_time += toc(lp_start_time); + stats.total_lp_iters += node_iter; } if (lp_status == dual::status_t::DUAL_UNBOUNDED) { @@ -726,8 +722,9 @@ node_solve_info_t branch_and_bound_t::solve_node( } else if (leaf_objective <= upper_bound + abs_fathom_tol) { // Choose fractional variable to branch on - const i_t branch_var = - pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); + auto [branch_var, obj_estimate] = pc_.variable_selection_and_obj_estimate( + leaf_fractional, leaf_solution.x, node_ptr->lower_bound, log); + node_ptr->objective_estimate = obj_estimate; assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( @@ -751,6 +748,9 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); return node_solve_info_t::TIME_LIMIT; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + return node_solve_info_t::ITERATION_LIMIT; + } else { if (thread_type == thread_type_t::EXPLORATION) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); @@ -854,6 +854,7 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod true, original_lp_.lower, original_lp_.upper, + exploration_stats_, settings_.log); ++exploration_stats_.nodes_since_last_log; @@ -877,23 +878,21 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod } else { // We've generated enough nodes, push further nodes onto the heap - mutex_heap_.lock(); - heap_.push(node->get_down_child()); - heap_.push(node->get_up_child()); - mutex_heap_.unlock(); + node_queue.push(node->get_down_child()); + node_queue.push(node->get_up_child()); } } } template -void branch_and_bound_t::explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) +void branch_and_bound_t::plunge_from(i_t task_id, + mip_node_t* start_node, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list) { bool recompute_bounds_and_basis = true; std::deque*> stack; @@ -922,6 +921,7 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree.update(node_ptr, node_status_t::FATHOMED); + recompute_bounds_and_basis = true; --exploration_stats_.nodes_unexplored; continue; } @@ -977,6 +977,7 @@ void branch_and_bound_t::explore_subtree(i_t task_id, recompute_bounds_and_basis, original_lp_.lower, original_lp_.upper, + exploration_stats_, settings_.log); recompute_bounds_and_basis = !has_children(status); @@ -997,28 +998,7 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); - - // The order here matters. We want to create a copy of the node - // before adding to the global heap. Otherwise, - // some thread may consume the node (possibly fathoming it) - // before we had the chance to add to the diving queue. - // This lead to a SIGSEGV. Although, in this case, it - // would be better if we discard the node instead. - if (get_heap_size() > settings_.num_bfs_threads) { - std::vector lower = original_lp_.lower; - std::vector upper = original_lp_.upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - mutex_dive_queue_.lock(); - diving_queue_.emplace(node->detach_copy(), std::move(lower), std::move(upper)); - mutex_dive_queue_.unlock(); - } - - mutex_heap_.lock(); - heap_.push(node); - mutex_heap_.unlock(); + node_queue.push(node); } exploration_stats_.nodes_unexplored += 2; @@ -1056,37 +1036,31 @@ void branch_and_bound_t::best_first_thread(i_t task_id, while (solver_status_ == mip_exploration_status_t::RUNNING && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - mip_node_t* start_node = nullptr; - + (active_subtrees_ > 0 || node_queue.best_first_queue_size() > 0)) { // If there any node left in the heap, we pop the top node and explore it. - mutex_heap_.lock(); - if (heap_.size() > 0) { - start_node = heap_.top(); - heap_.pop(); - active_subtrees_++; - } - mutex_heap_.unlock(); + std::optional*> start_node = node_queue.pop_best_first(active_subtrees_); + + if (start_node.has_value()) { + mip_node_t* node = start_node.value(); - if (start_node != nullptr) { - if (get_upper_bound() < start_node->lower_bound) { + if (get_upper_bound() < node->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); - search_tree.update(start_node, node_status_t::FATHOMED); + search_tree.graphviz_node(settings_.log, node, "cutoff", node->lower_bound); + search_tree.update(node, node_status_t::FATHOMED); active_subtrees_--; continue; } // Best-first search with plunging - explore_subtree(task_id, - start_node, - search_tree, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); + plunge_from(task_id, + node, + search_tree, + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list); active_subtrees_--; } @@ -1123,23 +1097,39 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A std::vector basic_list(m); std::vector nonbasic_list; + std::vector start_lower; + std::vector start_upper; + bool reset_starting_bounds = true; + while (solver_status_ == mip_exploration_status_t::RUNNING && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - std::optional> start_node; + (active_subtrees_ > 0 || node_queue.best_first_queue_size() > 0)) { + if (reset_starting_bounds) { + start_lower = original_lp_.lower; + start_upper = original_lp_.upper; + std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); + reset_starting_bounds = false; + } - mutex_dive_queue_.lock(); - if (diving_queue_.size() > 0) { start_node = diving_queue_.pop(); } - mutex_dive_queue_.unlock(); + std::optional> start_node = + node_queue.pop_diving(start_lower, start_upper, node_presolver.bounds_changed); if (start_node.has_value()) { - if (get_upper_bound() < start_node->node.lower_bound) { continue; } + reset_starting_bounds = true; + + bool is_feasible = node_presolver.bounds_strengthening(start_lower, start_upper, settings_); + if (get_upper_bound() < start_node->lower_bound || !is_feasible) { continue; } bool recompute_bounds_and_basis = true; - i_t nodes_explored = 0; - search_tree_t subtree(std::move(start_node->node)); + search_tree_t subtree(std::move(start_node.value())); std::deque*> stack; stack.push_front(&subtree.root); + bnb_stats_t dive_stats; + dive_stats.total_lp_iters = 0; + dive_stats.total_lp_solve_time = 0; + dive_stats.nodes_explored = 0; + dive_stats.nodes_unexplored = 0; + while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -1151,9 +1141,8 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A continue; } - if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; } - - if (nodes_explored >= 1000) { break; } + if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } + if (dive_stats.nodes_explored > 500) { break; } node_solve_info_t status = solve_node(node_ptr, subtree, @@ -1164,17 +1153,19 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A node_presolver, thread_type_t::DIVING, recompute_bounds_and_basis, - start_node->lower, - start_node->upper, + start_lower, + start_upper, + dive_stats, log); - - nodes_explored++; - + dive_stats.nodes_explored++; recompute_bounds_and_basis = !has_children(status); if (status == node_solve_info_t::TIME_LIMIT) { solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + break; + + } else if (status == node_solve_info_t::ITERATION_LIMIT) { + break; } else if (has_children(status)) { if (status == node_solve_info_t::UP_CHILD_FIRST) { @@ -1186,24 +1177,8 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A } } - if (stack.size() > 1) { - // If the diving thread is consuming the nodes faster than the - // best first search, then we split the current subtree at the - // lowest possible point and move to the queue, so it can - // be picked by another thread. - if (std::lock_guard lock(mutex_dive_queue_); - diving_queue_.size() < min_diving_queue_size_) { - mip_node_t* new_node = stack.back(); - stack.pop_back(); - - std::vector lower = start_node->lower; - std::vector upper = start_node->upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - new_node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - diving_queue_.emplace(new_node->detach_copy(), std::move(lower), std::move(upper)); - } + if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > 5) { + stack.pop_back(); } } } @@ -1426,7 +1401,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } // Choose variable to branch on - i_t branch_var = pc_.variable_selection(fractional, root_relax_soln_.x, log); + auto [branch_var, obj_estimate] = + pc_.variable_selection_and_obj_estimate(fractional, root_relax_soln_.x, root_objective_, log); search_tree_.root = std::move(mip_node_t(root_objective_, root_vstatus_)); search_tree_.num_nodes = 0; @@ -1455,7 +1431,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); active_subtrees_ = 0; - min_diving_queue_size_ = 4 * settings_.num_diving_threads; solver_status_ = mip_exploration_status_t::RUNNING; lower_bound_ceiling_ = inf; should_report_ = true; @@ -1491,7 +1466,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree_.root.lower_bound; + f_t lower_bound = node_queue.best_first_queue_size() > 0 ? node_queue.get_lower_bound() + : search_tree_.root.lower_bound; return set_final_solution(solution, lower_bound); } diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 38438cc9e..032eed11f 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -7,9 +7,9 @@ #pragma once -#include #include #include +#include #include #include #include @@ -20,7 +20,6 @@ #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { @@ -68,12 +67,22 @@ class bounds_strengthening_t; template void upper_bound_callback(f_t upper_bound); +template +struct bnb_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + + // This should only be used by the main thread + omp_atomic_t last_log = 0.0; + omp_atomic_t nodes_since_last_log = 0; +}; + template class branch_and_bound_t { public: - template - using mip_node_heap_t = std::priority_queue, node_compare_t>; - branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings); @@ -111,7 +120,6 @@ class branch_and_bound_t { f_t get_upper_bound(); f_t get_lower_bound(); - i_t get_heap_size(); bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -145,17 +153,7 @@ class branch_and_bound_t { mip_solution_t incumbent_; // Structure with the general info of the solver. - struct stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; - } exploration_stats_; + bnb_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -175,9 +173,8 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes to be explored. - omp_mutex_t mutex_heap_; - mip_node_heap_t*> heap_; + // Heap storing the nodes waiting to be explored. + node_queue_t node_queue; // Search tree search_tree_t search_tree_; @@ -185,11 +182,6 @@ class branch_and_bound_t { // Count the number of subtrees that are currently being explored. omp_atomic_t active_subtrees_; - // Queue for storing the promising node for performing dives. - omp_mutex_t mutex_dive_queue_; - diving_queue_t diving_queue_; - i_t min_diving_queue_size_; - // Global status of the solver. omp_atomic_t solver_status_; @@ -219,15 +211,15 @@ class branch_and_bound_t { const csr_matrix_t& Arow, i_t initial_heap_size); - // Explore the search tree using the best-first search with plunging strategy. - void explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); + // Perform a plunge in the subtree determined by the `start_node`. + void plunge_from(i_t task_id, + mip_node_t* start_node, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list); // Each "main" thread pops a node from the global heap and then performs a plunge // (i.e., a shallow dive) into the subtree determined by the node. @@ -235,6 +227,16 @@ class branch_and_bound_t { search_tree_t& search_tree, const csr_matrix_t& Arow); + // Perform a deep dive in the subtree determined by the `start_node`. + void dive_from(mip_node_t& start_node, + const std::vector& start_lower, + const std::vector& start_upper, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + thread_type_t diving_type); // Each diving thread pops the first node from the dive queue and then performs // a deep dive into the subtree determined by the node. void diving_thread(const csr_matrix_t& Arow); @@ -251,6 +253,7 @@ class branch_and_bound_t { bool recompute_basis_and_bounds, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log); // Sort the children based on the Martin's criteria. diff --git a/cpp/src/dual_simplex/diving_queue.hpp b/cpp/src/dual_simplex/diving_queue.hpp deleted file mode 100644 index f7035109e..000000000 --- a/cpp/src/dual_simplex/diving_queue.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include - -#include - -namespace cuopt::linear_programming::dual_simplex { - -template -struct diving_root_t { - mip_node_t node; - std::vector lower; - std::vector upper; - - diving_root_t(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - : node(std::move(node)), lower(std::move(lower)), upper(std::move(upper)) - { - } - - friend bool operator>(const diving_root_t& a, const diving_root_t& b) - { - return a.node.lower_bound > b.node.lower_bound; - } -}; - -// A min-heap for storing the starting nodes for the dives. -// This has a maximum size of 1024, such that the container -// will discard the least promising node if the queue is full. -template -class diving_queue_t { - private: - std::vector> buffer; - static constexpr i_t max_size_ = 1024; - - public: - diving_queue_t() { buffer.reserve(max_size_); } - - void push(diving_root_t&& node) - { - buffer.push_back(std::move(node)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - void emplace(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - { - buffer.emplace_back(std::move(node), std::move(lower), std::move(upper)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - diving_root_t pop() - { - std::pop_heap(buffer.begin(), buffer.end(), std::greater<>()); - diving_root_t node = std::move(buffer.back()); - buffer.pop_back(); - return node; - } - - i_t size() const { return buffer.size(); } - constexpr i_t max_size() const { return max_size_; } - const diving_root_t& top() const { return buffer.front(); } - void clear() { buffer.clear(); } -}; - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 1d66a21f7..a082932ac 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -45,6 +45,7 @@ class mip_node_t { branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), + objective_estimate(std::numeric_limits::infinity()), vstatus(0) { children[0] = nullptr; @@ -59,6 +60,7 @@ class mip_node_t { node_id(0), branch_var(-1), branch_dir(rounding_direction_t::NONE), + objective_estimate(std::numeric_limits::infinity()), vstatus(basis) { children[0] = nullptr; @@ -80,6 +82,7 @@ class mip_node_t { branch_var(branch_variable), branch_dir(branch_direction), fractional_val(branch_var_value), + objective_estimate(parent_node->objective_estimate), vstatus(basis) { @@ -227,17 +230,19 @@ class mip_node_t { mip_node_t detach_copy() const { mip_node_t copy(lower_bound, vstatus); - copy.branch_var = branch_var; - copy.branch_dir = branch_dir; - copy.branch_var_lower = branch_var_lower; - copy.branch_var_upper = branch_var_upper; - copy.fractional_val = fractional_val; - copy.node_id = node_id; + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.objective_estimate = objective_estimate; + copy.node_id = node_id; return copy; } node_status_t status; f_t lower_bound; + f_t objective_estimate; i_t depth; i_t node_id; i_t branch_var; @@ -262,22 +267,6 @@ void remove_fathomed_nodes(std::vector*>& stack) } } -template -class node_compare_t { - public: - bool operator()(const mip_node_t& a, const mip_node_t& b) const - { - return a.lower_bound > - b.lower_bound; // True if a comes before b, elements that come before are output last - } - - bool operator()(const mip_node_t* a, const mip_node_t* b) const - { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - } -}; - template class search_tree_t { public: diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp new file mode 100644 index 000000000..0234fa038 --- /dev/null +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -0,0 +1,173 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// This is a generic heap implementation based +// on the STL functions. The main benefit here is +// that we access the underlying container. +template +class heap_t { + public: + heap_t() = default; + virtual ~heap_t() = default; + + void push(const T& node) + { + buffer.push_back(node); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + void push(T&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + template + void emplace(Args&&... args) + { + buffer.emplace_back(std::forward(args)...); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + std::optional pop() + { + if (buffer.empty()) return std::nullopt; + + std::pop_heap(buffer.begin(), buffer.end(), comp); + T node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + size_t size() const { return buffer.size(); } + T& top() { return buffer.front(); } + void clear() { buffer.clear(); } + bool empty() const { return buffer.empty(); } + + private: + std::vector buffer; + Comp comp; +}; + +// A queue storing the nodes waiting to be explored/dived from. +template +class node_queue_t { + private: + struct heap_entry_t { + mip_node_t* node = nullptr; + f_t lower_bound = -inf; + f_t score = inf; + + heap_entry_t(mip_node_t* new_node) + : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) + { + } + }; + + // Comparision function for ordering the nodes based on their lower bound with + // lowest one being explored first. + struct lower_bound_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->lower_bound > b->lower_bound; + } + }; + + // Comparision function for ordering the nodes based on some score (currently the pseudocost + // estimate) with the lowest being explored first. + struct score_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->score > b->score; + } + }; + + heap_t, lower_bound_comp> best_first_heap; + heap_t, score_comp> diving_heap; + omp_mutex_t mutex; + + public: + void push(mip_node_t* new_node) + { + std::lock_guard lock(mutex); + auto entry = std::make_shared(new_node); + best_first_heap.push(entry); + diving_heap.push(entry); + } + + // In the current implementation, we are use the active number of subtree to decide + // when to stop the execution. We need to increment the counter at the same + // time as we pop a node from the queue to avoid some threads exiting + // the main loop thinking that the solver has already finished. + // This will be not needed in the master-worker model. + std::optional*> pop_best_first(omp_atomic_t& active_subtree) + { + std::lock_guard lock(mutex); + auto entry = best_first_heap.pop(); + + if (entry.has_value()) { + active_subtree++; + return std::exchange(entry.value()->node, nullptr); + } + + return std::nullopt; + } + + // In the current implementation, multiple threads can pop the nodes + // from the queue, so we need to pass the lower and upper bound here + // to avoid other thread fathoming the node (i.e., deleting) before we can read + // the variable bounds from the tree. + // This will be not needed in the master-worker model. + std::optional> pop_diving(std::vector& lower, + std::vector& upper, + std::vector& bounds_changed) + { + std::lock_guard lock(mutex); + + while (!diving_heap.empty()) { + auto entry = diving_heap.pop(); + + if (entry.has_value()) { + if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { + node_ptr->get_variable_bounds(lower, upper, bounds_changed); + return node_ptr->detach_copy(); + } + } + } + + return std::nullopt; + } + + i_t diving_queue_size() + { + std::lock_guard lock(mutex); + return diving_heap.size(); + } + + i_t best_first_queue_size() + { + std::lock_guard lock(mutex); + return best_first_heap.size(); + } + + f_t get_lower_bound() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 9f84e108d..1c0a33042 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -199,7 +199,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - mutex.lock(); + std::lock_guard lock(mutex); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -211,7 +211,6 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } - mutex.unlock(); } template @@ -254,16 +253,19 @@ void pseudo_costs_t::initialized(i_t& num_initialized_down, } template -i_t pseudo_costs_t::variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log) +std::pair pseudo_costs_t::variable_selection_and_obj_estimate( + const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) { - mutex.lock(); + std::lock_guard lock(mutex); const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); std::vector pseudo_cost_down(num_fractional); std::vector score(num_fractional); + f_t estimate = lower_bound; i_t num_initialized_down; i_t num_initialized_up; @@ -272,11 +274,11 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + log.debug("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); for (i_t k = 0; k < num_fractional; k++) { const i_t j = fractional[k]; @@ -296,6 +298,9 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const f_t f_up = std::ceil(solution[j]) - solution[j]; score[k] = std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps); + + estimate += std::min(std::max(pseudo_cost_down[k] * f_down, eps), + std::max(pseudo_cost_up[k] * f_up, eps)); } i_t branch_var = fractional[0]; @@ -309,12 +314,13 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio } } - log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); - - mutex.unlock(); + log.debug("Pseudocost branching on %d. Value %e. Score %e. Obj Estimate %e\n", + branch_var, + solution[branch_var], + score[select], + estimate); - return branch_var; + return {branch_var, estimate}; } template diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 799cdc3ff..ab01b2a85 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -43,9 +43,10 @@ class pseudo_costs_t { f_t& pseudo_cost_down_avg, f_t& pseudo_cost_up_avg) const; - i_t variable_selection(const std::vector& fractional, - const std::vector& solution, - logger_t& log); + std::pair variable_selection_and_obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log); void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln);