diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 6013dcaf5..60326f562 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -377,8 +377,6 @@ int main(int argc, char* argv[]) double memory_limit = program.get("--memory-limit"); bool track_allocations = program.get("--track-allocations")[0] == 't'; - if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } - if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); result_file = out_dir + "/final_result.csv"; @@ -415,6 +413,7 @@ int main(int argc, char* argv[]) paths.push_back(entry.path()); } } + if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } // if batch_num is given, trim the paths to only concerned batch if (batch_num != -1) { if (n_batches <= 0) { @@ -481,6 +480,7 @@ int main(int argc, char* argv[]) } merge_result_files(out_dir, result_file, n_gpus, batch_num); } else { + if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads(); } auto memory_resource = make_async(); if (memory_limit > 0) { auto limiting_adaptor = diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4391c53df..2e782f0ff 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -76,6 +76,13 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) endif(CMAKE_COMPILER_IS_GNUCXX) +# Undefine NDEBUG if assert mode is on +if(DEFINE_ASSERT) + message(STATUS "Undefining NDEBUG with assert mode enabled") + add_definitions(-UNDEBUG) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif() + # To use sanitizer with cuda runtime, one must follow a few steps: # 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' # 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 @@ -143,15 +150,10 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) elseif(CMAKE_CUDA_LINEINFO) message(STATUS "Enabling line info") list(APPEND CUOPT_CUDA_FLAGS -lineinfo) + list(APPEND CUOPT_CXX_FLAGS -lineinfo) set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -lineinfo") endif(CMAKE_BUILD_TYPE MATCHES Debug) -# Undefine NDEBUG if assert mode is on -if(DEFINE_ASSERT) - message(STATUS "Undefining NDEBUG with assert mode enabled") - add_definitions(-UNDEBUG) -endif() - # ################################################################################################## # - find CPM based dependencies ------------------------------------------------------------------ diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index eecafb14e..cee7fedd2 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -579,9 +579,12 @@ node_solve_info_t branch_and_bound_t::solve_node( mip_node_t* node_ptr, search_tree_t& search_tree, lp_problem_t& leaf_problem, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, thread_type_t thread_type, - bool recompute_bounds, + bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, logger_t& log) @@ -595,9 +598,10 @@ node_solve_info_t branch_and_bound_t::solve_node( simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings_.dual_tol; - lp_settings.inside_mip = 2; - lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); + lp_settings.cut_off = upper_bound + settings_.dual_tol; + lp_settings.inside_mip = 2; + lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); + lp_settings.scale_columns = false; #ifdef LOG_NODE_SIMPLEX lp_settings.set_log(true); @@ -623,7 +627,7 @@ node_solve_info_t branch_and_bound_t::solve_node( std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); // Set the correct bounds for the leaf problem - if (recompute_bounds) { + if (recompute_bounds_and_basis) { leaf_problem.lower = root_lower; leaf_problem.upper = root_upper; node_ptr->get_variable_bounds( @@ -644,20 +648,32 @@ node_solve_info_t branch_and_bound_t::solve_node( f_t lp_start_time = tic(); std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - lp_status = dual_phase2(2, - 0, - lp_start_time, - leaf_problem, - lp_settings, - leaf_vstatus, - leaf_solution, - node_iter, - leaf_edge_norms); + lp_status = dual_phase2_with_advanced_basis(2, + 0, + recompute_bounds_and_basis, + lp_start_time, + leaf_problem, + lp_settings, + leaf_vstatus, + basis_factors, + basic_list, + nonbasic_list, + leaf_solution, + node_iter, + leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { log.printf("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); - lp_status_t second_status = solve_linear_program_advanced( - leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status_t second_status = solve_linear_program_with_advanced_basis(leaf_problem, + lp_start_time, + lp_settings, + leaf_solution, + basis_factors, + basic_list, + nonbasic_list, + leaf_vstatus, + leaf_edge_norms); + lp_status = convert_lp_status_to_dual_status(second_status); } @@ -714,7 +730,7 @@ node_solve_info_t branch_and_bound_t::solve_node( assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, leaf_problem, log); - node_ptr->status = node_status_t::HAS_CHILDREN; + search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); rounding_direction_t round_dir = child_selection(node_ptr); @@ -735,7 +751,7 @@ node_solve_info_t branch_and_bound_t::solve_node( } else { if (thread_type == thread_type_t::EXPLORATION) { - lower_bound_ceiling_.fetch_min(node_ptr->lower_bound); + fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " "to " @@ -764,17 +780,15 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod // to repair the heuristic solution. repair_heuristic_solutions(); - f_t lower_bound = node->lower_bound; - f_t upper_bound = get_upper_bound(); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = upper_bound - lower_bound; - i_t nodes_explored = (++exploration_stats_.nodes_explored); - i_t nodes_unexplored = (--exploration_stats_.nodes_unexplored); - exploration_stats_.nodes_since_last_log++; + f_t lower_bound = node->lower_bound; + f_t upper_bound = get_upper_bound(); + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t abs_gap = upper_bound - lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); search_tree->update(node, node_status_t::FATHOMED); + --exploration_stats_.nodes_unexplored; return; } @@ -786,15 +800,14 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod abs_gap < 10 * settings_.absolute_mip_gap_tol) && (time_since_last_log >= 1)) || (time_since_last_log > 30) || now > settings_.time_limit) { - // Check if no new node was explored until now. If this is the case, - // only the last thread should report the progress - if (exploration_stats_.nodes_explored.load() == nodes_explored) { - exploration_stats_.nodes_since_last_log = 0; - exploration_stats_.last_log = tic(); + bool should_report = should_report_.exchange(false); + if (should_report) { f_t obj = compute_user_objective(original_lp_, upper_bound); f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap_user = user_mip_gap(obj, user_lower); + i_t nodes_explored = exploration_stats_.nodes_explored; + i_t nodes_unexplored = exploration_stats_.nodes_unexplored; settings_.log.printf( " %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", @@ -806,6 +819,10 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, gap_user.c_str(), now); + + exploration_stats_.nodes_since_last_log = 0; + exploration_stats_.last_log = tic(); + should_report_ = true; } } @@ -819,9 +836,17 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod std::vector row_sense; bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + const i_t m = leaf_problem.num_rows; + basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); + std::vector basic_list(m); + std::vector nonbasic_list; + node_solve_info_t status = solve_node(node, *search_tree, leaf_problem, + basis_factors, + basic_list, + nonbasic_list, node_presolver, thread_type_t::EXPLORATION, true, @@ -829,6 +854,10 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod original_lp_.upper, settings_.log); + ++exploration_stats_.nodes_since_last_log; + ++exploration_stats_.nodes_explored; + --exploration_stats_.nodes_unexplored; + if (status == node_solve_info_t::TIME_LIMIT) { solver_status_ = mip_exploration_status_t::TIME_LIMIT; return; @@ -859,9 +888,12 @@ void branch_and_bound_t::explore_subtree(i_t task_id, mip_node_t* start_node, search_tree_t& search_tree, lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver) + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list) { - bool recompute_bounds = true; + bool recompute_bounds_and_basis = true; std::deque*> stack; stack.push_front(start_node); @@ -884,14 +916,11 @@ void branch_and_bound_t::explore_subtree(i_t task_id, // - The lower bound of the parent is lower or equal to its children assert(task_id < local_lower_bounds_.size()); local_lower_bounds_[task_id] = lower_bound; - i_t nodes_explored = (++exploration_stats_.nodes_explored); - i_t nodes_unexplored = (--exploration_stats_.nodes_unexplored); - exploration_stats_.nodes_since_last_log++; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); search_tree.update(node_ptr, node_status_t::FATHOMED); - recompute_bounds = true; + --exploration_stats_.nodes_unexplored; continue; } @@ -908,6 +937,9 @@ void branch_and_bound_t::explore_subtree(i_t task_id, f_t obj = compute_user_objective(original_lp_, upper_bound); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string gap_user = user_mip_gap(obj, user_lower); + i_t nodes_explored = exploration_stats_.nodes_explored; + i_t nodes_unexplored = exploration_stats_.nodes_unexplored; + settings_.log.printf( " %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", nodes_explored, @@ -935,14 +967,21 @@ void branch_and_bound_t::explore_subtree(i_t task_id, node_solve_info_t status = solve_node(node_ptr, search_tree, leaf_problem, + basis_factors, + basic_list, + nonbasic_list, node_presolver, thread_type_t::EXPLORATION, - recompute_bounds, + recompute_bounds_and_basis, original_lp_.lower, original_lp_.upper, settings_.log); - recompute_bounds = !has_children(status); + recompute_bounds_and_basis = !has_children(status); + + ++exploration_stats_.nodes_since_last_log; + ++exploration_stats_.nodes_explored; + --exploration_stats_.nodes_unexplored; if (status == node_solve_info_t::TIME_LIMIT) { solver_status_ = mip_exploration_status_t::TIME_LIMIT; @@ -966,6 +1005,8 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (get_heap_size() > settings_.num_bfs_threads) { std::vector lower = original_lp_.lower; std::vector upper = original_lp_.upper; + std::fill( + node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); mutex_dive_queue_.lock(); @@ -1006,6 +1047,11 @@ void branch_and_bound_t::best_first_thread(i_t task_id, std::vector row_sense; bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + const i_t m = leaf_problem.num_rows; + basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); + std::vector basic_list(m); + std::vector nonbasic_list; + while (solver_status_ == mip_exploration_status_t::RUNNING && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && (active_subtrees_ > 0 || get_heap_size() > 0)) { @@ -1031,7 +1077,15 @@ void branch_and_bound_t::best_first_thread(i_t task_id, } // Best-first search with plunging - explore_subtree(task_id, start_node, search_tree, leaf_problem, node_presolver); + explore_subtree(task_id, + start_node, + search_tree, + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list); + active_subtrees_--; } @@ -1057,12 +1111,16 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A { logger_t log; log.log = false; - // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + const i_t m = leaf_problem.num_rows; + basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); + std::vector basic_list(m); + std::vector nonbasic_list; + while (solver_status_ == mip_exploration_status_t::RUNNING && (active_subtrees_ > 0 || get_heap_size() > 0)) { std::optional> start_node; @@ -1074,7 +1132,7 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A if (start_node.has_value()) { if (get_upper_bound() < start_node->node.lower_bound) { continue; } - bool recompute_bounds = true; + bool recompute_bounds_and_basis = true; search_tree_t subtree(std::move(start_node->node)); std::deque*> stack; stack.push_front(&subtree.root); @@ -1086,7 +1144,7 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds = true; + recompute_bounds_and_basis = true; continue; } @@ -1095,14 +1153,17 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A node_solve_info_t status = solve_node(node_ptr, subtree, leaf_problem, + basis_factors, + basic_list, + nonbasic_list, node_presolver, thread_type_t::DIVING, - recompute_bounds, + recompute_bounds_and_basis, start_node->lower, start_node->upper, log); - recompute_bounds = !has_children(status); + recompute_bounds_and_basis = !has_children(status); if (status == node_solve_info_t::TIME_LIMIT) { solver_status_ = mip_exploration_status_t::TIME_LIMIT; @@ -1123,17 +1184,18 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A // best first search, then we split the current subtree at the // lowest possible point and move to the queue, so it can // be picked by another thread. - if (diving_queue_.size() < min_diving_queue_size_) { + if (std::lock_guard lock(mutex_dive_queue_); + diving_queue_.size() < min_diving_queue_size_) { mip_node_t* new_node = stack.back(); stack.pop_back(); std::vector lower = start_node->lower; std::vector upper = start_node->upper; + std::fill( + node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); new_node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - mutex_dive_queue_.lock(); diving_queue_.emplace(new_node->detach_copy(), std::move(lower), std::move(upper)); - mutex_dive_queue_.unlock(); } } } @@ -1303,6 +1365,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut min_diving_queue_size_ = 4 * settings_.num_diving_threads; solver_status_ = mip_exploration_status_t::RUNNING; lower_bound_ceiling_ = inf; + should_report_ = true; #pragma omp parallel num_threads(settings_.num_threads) { diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 6ad5b3d81..215c533ce 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -127,7 +127,7 @@ class branch_and_bound_t { omp_atomic_t total_lp_iters = 0; // This should only be used by the main thread - f_t last_log = 0.0; + omp_atomic_t last_log = 0.0; omp_atomic_t nodes_since_last_log = 0; } exploration_stats_; @@ -159,6 +159,8 @@ class branch_and_bound_t { // Global status of the solver. omp_atomic_t solver_status_; + omp_atomic_t should_report_; + // In case, a best-first thread encounters a numerical issue when solving a node, // its blocks the progression of the lower bound. omp_atomic_t lower_bound_ceiling_; @@ -188,7 +190,10 @@ class branch_and_bound_t { mip_node_t* start_node, search_tree_t& search_tree, lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver); + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list); // Each "main" thread pops a node from the global heap and then performs a plunge // (i.e., a shallow dive) into the subtree determined by the node. @@ -204,9 +209,12 @@ class branch_and_bound_t { node_solve_info_t solve_node(mip_node_t* node_ptr, search_tree_t& search_tree, lp_problem_t& leaf_problem, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, thread_type_t thread_type, - bool recompute, + bool recompute_basis_and_bounds, const std::vector& root_lower, const std::vector& root_upper, logger_t& log); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 39ea9b465..4932ddae9 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2235,6 +2235,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2::bound_info(lp, settings); if (initialize_basis) { std::vector superbasic_list; + nonbasic_list.clear(); + nonbasic_list.reserve(n - m); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); diff --git a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu index 32866a87a..0a3c09bda 100644 --- a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu +++ b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu @@ -28,13 +28,13 @@ localized_duality_gap_container_t::localized_duality_gap_container_t( primal_solution_{static_cast(primal_size), handle_ptr->get_stream()}, // Needed even in kkt dual_solution_{static_cast(dual_size), handle_ptr->get_stream()}, // Needed even in kkt - primal_gradient_{is_KKT_restart() ? 0 : static_cast(primal_size), + primal_gradient_{!is_trust_region_restart() ? 0 : static_cast(primal_size), handle_ptr->get_stream()}, - dual_gradient_{is_KKT_restart() ? 0 : static_cast(dual_size), + dual_gradient_{!is_trust_region_restart() ? 0 : static_cast(dual_size), handle_ptr->get_stream()}, - primal_solution_tr_{is_KKT_restart() ? 0 : static_cast(primal_size), + primal_solution_tr_{!is_trust_region_restart() ? 0 : static_cast(primal_size), handle_ptr->get_stream()}, - dual_solution_tr_{is_KKT_restart() ? 0 : static_cast(dual_size), + dual_solution_tr_{!is_trust_region_restart() ? 0 : static_cast(dual_size), handle_ptr->get_stream()} { RAFT_CUDA_TRY(cudaMemsetAsync(primal_solution_.data(), diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 3ddd158cb..565377682 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -112,7 +112,9 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( primal_size_{primal_size, stream_view_}, dual_size_{dual_size, stream_view_}, primal_norm_weight_{stream_view_}, - weights_{(is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), + weights_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), stream_view_}, dual_norm_weight_{stream_view_}, restart_triggered_{0, stream_view_}, @@ -122,7 +124,7 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( last_restart_duality_gap_{handle_ptr_, primal_size, dual_size}, // If KKT restart, call the empty cusparse_view constructor avg_duality_gap_cusparse_view_{ - (is_KKT_restart()) + (!is_trust_region_restart()) ? cusparse_view_t(handle_ptr_, cusparse_view.A_, cusparse_view.A_indices_) : cusparse_view_t(handle_ptr_, op_problem, @@ -132,7 +134,7 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( avg_duality_gap_.primal_gradient_.data(), avg_duality_gap_.dual_gradient_.data())}, current_duality_gap_cusparse_view_{ - (is_KKT_restart()) + (!is_trust_region_restart()) ? cusparse_view_t(handle_ptr_, cusparse_view.A_, cusparse_view.A_indices_) : cusparse_view_t(handle_ptr_, op_problem, @@ -142,7 +144,7 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( current_duality_gap_.primal_gradient_.data(), current_duality_gap_.dual_gradient_.data())}, last_restart_duality_gap_cusparse_view_{ - (is_KKT_restart()) + (!is_trust_region_restart()) ? cusparse_view_t(handle_ptr_, cusparse_view.A_, cusparse_view.A_indices_) : cusparse_view_t(handle_ptr_, op_problem, @@ -153,35 +155,43 @@ pdlp_restart_strategy_t::pdlp_restart_strategy_t( last_restart_duality_gap_.dual_gradient_.data())}, gap_reduction_ratio_last_trial_{stream_view_}, last_restart_length_{0}, - // If KKT restart, don't need to init all of those - center_point_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - objective_vector_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - unsorted_direction_full_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - direction_full_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - threshold_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - lower_bound_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - upper_bound_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, - test_point_{ - (is_KKT_restart()) ? 0 : static_cast(primal_size_h_ + dual_size_h_), - stream_view_}, + // If trust region restart is not used, no need to init all of those + center_point_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + objective_vector_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + unsorted_direction_full_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + direction_full_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + threshold_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + lower_bound_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + upper_bound_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, + test_point_{(!is_trust_region_restart()) + ? 0 + : static_cast(primal_size_h_ + dual_size_h_), + stream_view_}, transformed_constraint_lower_bounds_{ - (is_KKT_restart()) ? 0 : static_cast(dual_size_h_), stream_view_}, + (!is_trust_region_restart()) ? 0 : static_cast(dual_size_h_), stream_view_}, transformed_constraint_upper_bounds_{ - (is_KKT_restart()) ? 0 : static_cast(dual_size_h_), stream_view_}, + (!is_trust_region_restart()) ? 0 : static_cast(dual_size_h_), stream_view_}, shared_live_kernel_accumulator_{0, stream_view_}, target_threshold_{stream_view_}, low_radius_squared_{stream_view_}, @@ -847,7 +857,7 @@ bool pdlp_restart_strategy_t::compute_restart( { raft::common::nvtx::range fun_scope("compute_restart"); - if (is_KKT_restart()) { + if (pdlp_hyper_params::restart_strategy == static_cast(restart_strategy_t::KKT_RESTART)) { return run_kkt_restart(pdhg_solver, primal_solution_avg, dual_solution_avg, diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh index 313e92647..276130e9a 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh @@ -346,12 +346,11 @@ class pdlp_restart_strategy_t { }; template -bool is_KKT_restart() +bool is_trust_region_restart() { return pdlp_hyper_params::restart_strategy == - static_cast(pdlp_restart_strategy_t::restart_strategy_t::KKT_RESTART) || - pdlp_hyper_params::restart_strategy == - static_cast(pdlp_restart_strategy_t::restart_strategy_t::CUPDLPX_RESTART); + static_cast( + pdlp_restart_strategy_t::restart_strategy_t::TRUST_REGION_RESTART); } } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 78f2b9fa0..4372819d1 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -430,8 +430,7 @@ void diversity_manager_t::diversity_step(i_t max_iterations_without_im improved = false; while (k-- > 0) { if (check_b_b_preemption()) { return; } - auto new_sol_vector = population.get_external_solutions(); - recombine_and_ls_with_all(new_sol_vector); + population.add_external_solutions_to_population(); population.adjust_weights_according_to_best_feasible(); cuopt_assert(population.test_invariant(), ""); if (population.current_size() < 2) { @@ -584,7 +583,15 @@ diversity_manager_t::recombine_and_local_search(solution_t& ls_config_t ls_config; ls_config.best_objective_of_parents = best_objective_of_parents; ls_config.at_least_one_parent_feasible = at_least_one_parent_feasible; - success = this->run_local_search(offspring, population.weights, timer, ls_config); + offspring.swap_problem_pointers(); + population.weights_with_cuts.cstr_weights.resize(offspring.problem_ptr->n_constraints, + offspring.handle_ptr->get_stream()); + raft::copy(population.weights_with_cuts.cstr_weights.data(), + population.weights.cstr_weights.data(), + population.weights.cstr_weights.size(), + offspring.handle_ptr->get_stream()); + success = this->run_local_search(offspring, population.weights_with_cuts, timer, ls_config); + offspring.swap_problem_pointers(); if (!success) { // add the attempt mab_recombiner.add_mab_reward(mab_recombiner.last_chosen_option, diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index f8e40231a..e18660114 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -24,7 +24,6 @@ constexpr double weight_decrease_ratio = 0.9; constexpr double max_infeasibility_weight = 1e12; constexpr double min_infeasibility_weight = 1.; constexpr double infeasibility_balance_ratio = 1.1; -constexpr double halving_skip_ratio = 0.75; template population_t::population_t(std::string const& name_, @@ -41,8 +40,8 @@ population_t::population_t(std::string const& name_, max_solutions(max_solutions_), infeasibility_importance(infeasibility_weight_), weights(0, context.problem_ptr->handle_ptr), + weights_with_cuts(0, context.problem_ptr->handle_ptr), rng(cuopt::seed_generator::get_seed()), - early_exit_primal_generation(false), population_hash_map(*problem_ptr), timer(0) { @@ -64,6 +63,14 @@ i_t get_max_var_threshold(i_t n_vars) return n_vars - 10; } +template +void population_t::apply_problem_ptr_to_all_solutions() +{ + for (size_t i = 0; i < indices.size(); i++) { + solutions[indices[i].first].second.problem_with_cuts_ptr = problem_ptr_with_cuts; + } +} + template void population_t::allocate_solutions() { @@ -73,6 +80,20 @@ void population_t::allocate_solutions() } } +template +void population_t::set_problem_ptr_with_cuts(problem_t* problem_ptr_with_cuts) +{ + constexpr f_t ten = 10.; + this->problem_ptr_with_cuts = problem_ptr_with_cuts; + weights_with_cuts.cstr_weights.resize(problem_ptr_with_cuts->n_constraints, + problem_ptr_with_cuts->handle_ptr->get_stream()); + // fill last element with default + thrust::uninitialized_fill(problem_ptr_with_cuts->handle_ptr->get_thrust_policy(), + weights_with_cuts.cstr_weights.begin() + problem_ptr->n_constraints, + weights_with_cuts.cstr_weights.end(), + ten); +} + template void population_t::initialize_population() { @@ -87,6 +108,12 @@ void population_t::initialize_population() weights.cstr_weights.begin(), weights.cstr_weights.end(), ten); + weights_with_cuts.cstr_weights.resize(problem_ptr->n_constraints, + problem_ptr->handle_ptr->get_stream()); + thrust::uninitialized_fill(problem_ptr->handle_ptr->get_thrust_policy(), + weights_with_cuts.cstr_weights.begin(), + weights_with_cuts.cstr_weights.end(), + ten); } template @@ -109,12 +136,11 @@ std::pair, solution_t> population_t::ge auto second_solution = solutions[indices[j].first].second; // if best feasible and best are the same, take the second index instead of best if (i == 0 && j == 1) { - bool same = - check_integer_equal_on_indices(first_solution.problem_ptr->integer_indices, - first_solution.assignment, - second_solution.assignment, - first_solution.problem_ptr->tolerances.integrality_tolerance, - first_solution.handle_ptr); + bool same = check_integer_equal_on_indices(problem_ptr->integer_indices, + first_solution.assignment, + second_solution.assignment, + problem_ptr->tolerances.integrality_tolerance, + first_solution.handle_ptr); if (same) { auto new_sol = solutions[indices[2].first].second; second_solution = std::move(new_sol); @@ -172,7 +198,6 @@ void population_t::add_external_solution(const std::vector& solut CUOPT_LOG_DEBUG("Found new best solution %g in external queue", problem_ptr->get_user_obj_from_solver_obj(objective)); } - if (external_solution_queue.size() >= 5) { early_exit_primal_generation = true; } solutions_in_external_queue_ = true; } @@ -184,6 +209,7 @@ void population_t::add_external_solutions_to_population() auto new_sol_vector = get_external_solutions(); add_solutions_from_vec(std::move(new_sol_vector)); + apply_problem_ptr_to_all_solutions(); } // normally we would need a lock here but these are boolean types and race conditions are not @@ -191,8 +217,7 @@ void population_t::add_external_solutions_to_population() template void population_t::preempt_heuristic_solver() { - preempt_heuristic_solver_ = true; - early_exit_primal_generation = true; + preempt_heuristic_solver_ = true; } template @@ -282,7 +307,7 @@ void population_t::run_solution_callbacks(solution_t& sol) if (context.settings.mip_scaling) { context.scaling.unscale_solutions(temp_sol.assignment, dummy); // Need to get unscaled problem as well - problem_t n_problem(*sol.problem_ptr->original_problem_ptr); + problem_t n_problem(*problem_ptr->original_problem_ptr); temp_sol.problem_ptr = &n_problem; temp_sol.resize_to_original_problem(); temp_sol.compute_feasibility(); @@ -667,42 +692,6 @@ std::vector> population_t::population_to_vector() return sol_vec; } -template -void population_t::halve_the_population() -{ - raft::common::nvtx::range fun_scope("halve_the_population"); - // try 3/4 here - if (current_size() <= (max_solutions * halving_skip_ratio)) { return; } - CUOPT_LOG_DEBUG("Halving the population, current size: %lu", current_size()); - // put population into a vector - auto sol_vec = population_to_vector(); - i_t counter = 0; - constexpr i_t max_adjustments = 4; - size_t max_var_threshold = get_max_var_threshold(problem_ptr->n_integer_vars); - - std::lock_guard lock(write_mutex); - while (current_size() > max_solutions / 2) { - clear_except_best_feasible(); - var_threshold = std::max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars); - for (auto& sol : sol_vec) { - add_solution(solution_t(sol)); - } - if (counter++ > max_adjustments) break; - } - counter = 0; - // if we removed too many decrease the diversity a little - while (current_size() < max_solutions / 4) { - clear_except_best_feasible(); - var_threshold = std::min( - max_var_threshold, - std::min((size_t)(var_threshold * 1.02), (size_t)(0.995 * problem_ptr->n_integer_vars))); - for (auto& sol : sol_vec) { - add_solution(solution_t(sol)); - } - if (counter++ > max_adjustments) break; - } -} - template size_t population_t::find_free_solution_index() { diff --git a/cpp/src/mip/diversity/population.cuh b/cpp/src/mip/diversity/population.cuh index 41a2f05b7..b2c6fb4cd 100644 --- a/cpp/src/mip/diversity/population.cuh +++ b/cpp/src/mip/diversity/population.cuh @@ -151,7 +151,6 @@ class population_t { void find_diversity(std::vector>& initial_sol_vector, bool avg); std::vector> population_to_vector(); - void halve_the_population(); void run_solution_callbacks(solution_t& sol); @@ -161,6 +160,9 @@ class population_t { void diversity_step(i_t max_iterations_without_improvement); + void set_problem_ptr_with_cuts(problem_t* problem_ptr_with_cuts); + void apply_problem_ptr_to_all_solutions(); + // does some consistency tests bool test_invariant(); @@ -169,6 +171,7 @@ class population_t { std::string name; mip_solver_context_t& context; problem_t* problem_ptr; + problem_t* problem_ptr_with_cuts; diversity_manager_t& dm; i_t var_threshold; i_t initial_threshold; @@ -178,6 +181,7 @@ class population_t { f_t infeasibility_importance = 100.; size_t max_solutions; weight_t weights; + weight_t weights_with_cuts; std::vector> indices; std::vector>> solutions; @@ -202,10 +206,9 @@ class population_t { i_t update_iter = 0; std::recursive_mutex write_mutex; std::mutex solution_mutex; - std::atomic early_exit_primal_generation = false; std::atomic preempt_heuristic_solver_ = false; - std::atomic solutions_in_external_queue_ = false; f_t best_feasible_objective = std::numeric_limits::max(); + std::atomic solutions_in_external_queue_ = false; assignment_hash_map_t population_hash_map; cuopt::timer_t timer; }; diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh index 6b9a26554..dc0509054 100644 --- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh @@ -139,6 +139,7 @@ class bound_prop_recombiner_t : public recombiner_t { auto& other_solution = a.get_feasible() ? b : a; // copy the solution from guiding solution_t offspring(guiding_solution); + offspring.swap_problem_pointers(); // find same values and populate it to offspring i_t n_different_vars = this->assign_same_integer_values(a, b, offspring); CUOPT_LOG_DEBUG("BP rec: Number of different variables %d MAX_VARS %d", @@ -180,8 +181,9 @@ class bound_prop_recombiner_t : public recombiner_t { rmm::device_uvector old_assignment(offspring.assignment, offspring.handle_ptr->get_stream()); offspring.handle_ptr->sync_stream(); - offspring.assignment = std::move(fixed_assignment); - offspring.problem_ptr = &fixed_problem; + offspring.assignment = std::move(fixed_assignment); + problem_t* orig_problem_ptr = offspring.problem_ptr; + offspring.problem_ptr = &fixed_problem; cuopt_func_call(offspring.test_variable_bounds(false)); get_probing_values_for_feasible(guiding_solution, other_solution, @@ -198,7 +200,7 @@ class bound_prop_recombiner_t : public recombiner_t { constraint_prop.single_rounding_only = false; cuopt_func_call(bool feasible_after_bounds_prop = offspring.get_feasible()); offspring.handle_ptr->sync_stream(); - offspring.problem_ptr = a.problem_ptr; + offspring.problem_ptr = orig_problem_ptr; fixed_assignment = std::move(offspring.assignment); offspring.assignment = std::move(old_assignment); offspring.handle_ptr->sync_stream(); @@ -216,6 +218,7 @@ class bound_prop_recombiner_t : public recombiner_t { } constraint_prop.max_n_failed_repair_iterations = 1; cuopt_func_call(offspring.test_number_all_integer()); + offspring.swap_problem_pointers(); bool better_cost_than_parents = offspring.get_quality(weights) < std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights)); diff --git a/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh b/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh index 1daaf3e51..d64a53145 100644 --- a/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/fp_recombiner.cuh @@ -44,6 +44,7 @@ class fp_recombiner_t : public recombiner_t { auto& other_solution = a.get_feasible() ? b : a; // copy the solution from A solution_t offspring(guiding_solution); + offspring.swap_problem_pointers(); // find same values and populate it to offspring i_t n_different_vars = this->assign_same_integer_values(guiding_solution, other_solution, offspring); @@ -126,6 +127,7 @@ class fp_recombiner_t : public recombiner_t { fp_recombiner_config_t::decrease_max_n_of_vars_from_other(); } } + offspring.swap_problem_pointers(); bool better_cost_than_parents = offspring.get_quality(weights) < std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights)); diff --git a/cpp/src/mip/diversity/recombiners/line_segment_recombiner.cuh b/cpp/src/mip/diversity/recombiners/line_segment_recombiner.cuh index ca2e4422b..fd3592edc 100644 --- a/cpp/src/mip/diversity/recombiners/line_segment_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/line_segment_recombiner.cuh @@ -75,6 +75,7 @@ class line_segment_recombiner_t : public recombiner_t { auto& other_solution = a.get_feasible() ? b : a; // copy the solution from A solution_t offspring(guiding_solution); + offspring.swap_problem_pointers(); timer_t line_segment_timer{ls_recombiner_config_t::time_limit}; // TODO after we have the conic combination, detect the lambda change // (i.e. the integral variables flip on line segment) @@ -99,6 +100,7 @@ class line_segment_recombiner_t : public recombiner_t { is_feasibility_run, line_segment_timer); line_segment_search.settings = {}; + offspring.swap_problem_pointers(); bool better_cost_than_parents = offspring.get_quality(weights) < std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights)); diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 62fb52fe1..785205ce2 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -46,6 +46,7 @@ class sub_mip_recombiner_t : public recombiner_t { auto& other_solution = a.get_feasible() ? b : a; // copy the solution from A solution_t offspring(guiding_solution); + offspring.swap_problem_pointers(); // find same values and populate it to offspring i_t n_different_vars = this->assign_same_integer_values(guiding_solution, other_solution, offspring); @@ -146,7 +147,6 @@ class sub_mip_recombiner_t : public recombiner_t { } cuopt_func_call(offspring.test_variable_bounds()); cuopt_assert(offspring.test_number_all_integer(), "All must be integers after offspring"); - offspring.compute_feasibility(); // bool same_as_parents = this->check_if_offspring_is_same_as_parents(offspring, a, b); // adjust the max_n_of_vars_from_other if (n_different_vars > (i_t)sub_mip_recombiner_config_t::max_n_of_vars_from_other) { @@ -171,10 +171,15 @@ class sub_mip_recombiner_t : public recombiner_t { rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); scaling.unscale_solutions(fixed_assignment, dummy); sol.unfix_variables(fixed_assignment, variable_map); - sol.compute_feasibility(); + // the current problem is the proble with objective cut + // to add to the population, swap problem to original + cuopt_assert(sol.compute_feasibility(), "Solution must be feasible"); + sol.swap_problem_pointers(); + cuopt_assert(sol.get_feasible(), "Solution must be feasible"); cuopt_func_call(sol.test_variable_bounds()); population.add_solution(std::move(sol)); } + offspring.swap_problem_pointers(); bool better_cost_than_parents = offspring.get_quality(weights) < std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights)); diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index a8e06440a..dd2637a4a 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -32,15 +32,9 @@ local_search_t::local_search_t(mip_solver_context_t& context fj_sol_on_lp_opt(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), fj(context), - // fj_tree(fj), constraint_prop(context), line_segment_search(fj, constraint_prop), - fp(context, - fj, - // fj_tree, - constraint_prop, - line_segment_search, - lp_optimal_solution_), + fp(context, fj, constraint_prop, line_segment_search, lp_optimal_solution_), rng(cuopt::seed_generator::get_seed()), problem_with_objective_cut(*context.problem_ptr, context.problem_ptr->handle_ptr) { @@ -150,7 +144,6 @@ bool local_search_t::do_fj_solve(solution_t& solution, if (time_limit == 0.) return solution.get_feasible(); timer_t timer(time_limit); - auto h_weights = cuopt::host_copy(in_fj.cstr_weights, solution.handle_ptr->get_stream()); auto h_objective_weight = in_fj.objective_weight.value(solution.handle_ptr->get_stream()); for (auto& cpu_fj : ls_cpu_fj) { @@ -158,8 +151,6 @@ bool local_search_t::do_fj_solve(solution_t& solution, solution, h_weights, h_weights, h_objective_weight, fj_settings_t{}, true); } - auto solution_copy = solution; - // Start CPU solver in background thread for (auto& cpu_fj : ls_cpu_fj) { cpu_fj.start_cpu_solver(); @@ -178,7 +169,7 @@ bool local_search_t::do_fj_solve(solution_t& solution, auto gpu_fj_end = std::chrono::high_resolution_clock::now(); double gpu_fj_duration = std::chrono::duration(gpu_fj_end - gpu_fj_start).count(); - solution_t solution_cpu(*solution.problem_ptr); + solution_t solution_cpu(solution); f_t best_cpu_obj = std::numeric_limits::max(); // // Wait for CPU solver to finish @@ -263,18 +254,8 @@ bool local_search_t::run_local_search(solution_t& solution, raft::common::nvtx::range fun_scope("local search"); fj_settings_t fj_settings; if (timer.check_time_limit()) return false; - // adjust these time limits - if (!solution.get_feasible()) { - if (ls_config.at_least_one_parent_feasible) { - fj_settings.time_limit = 0.5; - timer = timer_t(fj_settings.time_limit); - } else { - fj_settings.time_limit = 0.25; - timer = timer_t(fj_settings.time_limit); - } - } else { - fj_settings.time_limit = std::min(1., timer.remaining_time()); - } + fj_settings.time_limit = std::min(1., timer.remaining_time()); + timer = timer_t(fj_settings.time_limit); fj_settings.update_weights = false; fj_settings.feasibility_run = false; fj.set_fj_settings(fj_settings); @@ -522,15 +503,15 @@ void local_search_t::resize_vectors(problem_t& problem, template void local_search_t::save_solution_and_add_cutting_plane( - solution_t& solution, rmm::device_uvector& best_solution, f_t& best_objective) + solution_t& best_in_population, solution_t& solution, f_t& best_objective) { raft::common::nvtx::range fun_scope("save_solution_and_add_cutting_plane"); - if (solution.get_objective() < best_objective) { - raft::copy(best_solution.data(), - solution.assignment.data(), + if (best_in_population.get_objective() < best_objective) { + raft::copy(solution.assignment.data(), + best_in_population.assignment.data(), solution.assignment.size(), solution.handle_ptr->get_stream()); - best_objective = solution.get_objective(); + best_objective = best_in_population.get_objective(); f_t objective_cut = best_objective - std::max(std::abs(0.001 * best_objective), OBJECTIVE_EPSILON); problem_with_objective_cut.add_cutting_plane_at_objective(objective_cut); @@ -541,9 +522,6 @@ template void local_search_t::resize_to_new_problem() { resize_vectors(problem_with_objective_cut, problem_with_objective_cut.handle_ptr); - // hint for next PR in case load balanced is reintroduced - // lb_constraint_prop.temp_problem.setup(problem_with_objective_cut); - // lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); constraint_prop.bounds_update.resize(problem_with_objective_cut); } @@ -551,75 +529,88 @@ template void local_search_t::resize_to_old_problem(problem_t* old_problem_ptr) { resize_vectors(*old_problem_ptr, old_problem_ptr->handle_ptr); - // hint for next PR in case load balanced is reintroduced - // lb_constraint_prop.temp_problem.setup(*old_problem_ptr); - // lb_constraint_prop.bounds_update.setup(lb_constraint_prop.temp_problem); constraint_prop.bounds_update.resize(*old_problem_ptr); } template -void local_search_t::reset_alpha_and_save_solution( - solution_t& solution, - problem_t* old_problem_ptr, - population_t* population_ptr, - i_t i, - i_t last_improved_iteration, - rmm::device_uvector& best_solution, - f_t& best_objective) +void local_search_t::handle_cutting_plane_and_weights( + solution_t& solution, population_t* population_ptr, f_t& best_objective) { - raft::common::nvtx::range fun_scope("reset_alpha_and_save_solution"); - fp.config.alpha = default_alpha; - solution_t solution_copy(solution); - solution_copy.problem_ptr = old_problem_ptr; - solution_copy.resize_to_problem(); - population_ptr->add_solution(std::move(solution_copy)); - population_ptr->add_external_solutions_to_population(); if (!cutting_plane_added_for_active_run) { - solution.problem_ptr = &problem_with_objective_cut; - solution.resize_to_problem(); + population_ptr->set_problem_ptr_with_cuts(&problem_with_objective_cut); + constexpr bool is_cuts_problem = true; + solution.set_problem_ptr(&problem_with_objective_cut, is_cuts_problem); resize_to_new_problem(); cutting_plane_added_for_active_run = true; raft::copy(population_ptr->weights.cstr_weights.data(), fj.cstr_weights.data(), population_ptr->weights.cstr_weights.size(), solution.handle_ptr->get_stream()); + raft::copy(population_ptr->weights_with_cuts.cstr_weights.data(), + fj.cstr_weights.data(), + fj.cstr_weights.size(), + solution.handle_ptr->get_stream()); } population_ptr->update_weights(); - save_solution_and_add_cutting_plane( - population_ptr->best_feasible(), best_solution, best_objective); - raft::copy(solution.assignment.data(), - best_solution.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); + save_solution_and_add_cutting_plane(population_ptr->best_feasible(), solution, best_objective); + population_ptr->apply_problem_ptr_to_all_solutions(); +} + +template +void local_search_t::reset_alpha_and_save_solution(solution_t& solution, + problem_t* old_problem_ptr, + population_t* population_ptr, + i_t i, + i_t last_improved_iteration, + f_t& best_objective) +{ + raft::common::nvtx::range fun_scope("reset_alpha_and_save_solution"); + fp.reset(); + solution_t solution_copy(solution); + solution_copy.set_problem_ptr(old_problem_ptr); + population_ptr->add_solution(std::move(solution_copy)); + population_ptr->add_external_solutions_to_population(); + handle_cutting_plane_and_weights(solution, population_ptr, best_objective); population_ptr->print(); } template -void local_search_t::reset_alpha_and_run_recombiners( - solution_t& solution, - problem_t* old_problem_ptr, - population_t* population_ptr, - i_t i, - i_t last_improved_iteration, - rmm::device_uvector& best_solution, - f_t& best_objective) +void local_search_t::run_recombiners(solution_t& solution, + problem_t* old_problem_ptr, + population_t* population_ptr, + i_t i, + i_t last_improved_iteration, + f_t& best_objective) { raft::common::nvtx::range fun_scope("reset_alpha_and_run_recombiners"); constexpr i_t iterations_for_stagnation = 3; constexpr i_t max_iterations_without_improvement = 8; population_ptr->add_external_solutions_to_population(); + if (population_ptr->is_feasible()) { + if (!cutting_plane_added_for_active_run) { + solution.set_problem_ptr(&problem_with_objective_cut); + population_ptr->set_problem_ptr_with_cuts(&problem_with_objective_cut); + resize_to_new_problem(); + cutting_plane_added_for_active_run = true; + raft::copy(population_ptr->weights.cstr_weights.data(), + fj.cstr_weights.data(), + population_ptr->weights.cstr_weights.size(), + solution.handle_ptr->get_stream()); + raft::copy(population_ptr->weights_with_cuts.cstr_weights.data(), + fj.cstr_weights.data(), + fj.cstr_weights.size(), + solution.handle_ptr->get_stream()); + } + population_ptr->update_weights(); + save_solution_and_add_cutting_plane(population_ptr->best_feasible(), solution, best_objective); + } if (population_ptr->current_size() > 1 && i - last_improved_iteration > iterations_for_stagnation) { - fp.config.alpha = default_alpha; + population_ptr->apply_problem_ptr_to_all_solutions(); population_ptr->diversity_step(max_iterations_without_improvement); population_ptr->print(); population_ptr->update_weights(); - save_solution_and_add_cutting_plane( - population_ptr->best_feasible(), best_solution, best_objective); - raft::copy(solution.assignment.data(), - best_solution.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); + save_solution_and_add_cutting_plane(population_ptr->best_feasible(), solution, best_objective); } } @@ -635,7 +626,6 @@ bool local_search_t::run_fp(solution_t& solution, cutting_plane_added_for_active_run = is_feasible; double best_objective = is_feasible ? solution.get_objective() : std::numeric_limits::max(); - rmm::device_uvector best_solution(solution.assignment, solution.handle_ptr->get_stream()); problem_t* old_problem_ptr = solution.problem_ptr; fp.timer = timer_t(timer.remaining_time()); // if it has not been initialized yet, create a new problem and move it to the cut problem @@ -650,8 +640,9 @@ bool local_search_t::run_fp(solution_t& solution, // Do the copy here for proper handling of the added constraints weight fj.copy_weights( population_ptr->weights, solution.handle_ptr, problem_with_objective_cut.n_constraints); - solution.problem_ptr = &problem_with_objective_cut; - solution.resize_to_problem(); + solution.set_problem_ptr(&problem_with_objective_cut); + population_ptr->set_problem_ptr_with_cuts(&problem_with_objective_cut); + population_ptr->apply_problem_ptr_to_all_solutions(); resize_to_new_problem(); } i_t last_improved_iteration = 0; @@ -676,13 +667,8 @@ bool local_search_t::run_fp(solution_t& solution, if (is_feasible) { CUOPT_LOG_DEBUG("Found feasible in FP with obj %f. Continue with FJ!", solution.get_objective()); - reset_alpha_and_save_solution(solution, - old_problem_ptr, - population_ptr, - i, - last_improved_iteration, - best_solution, - best_objective); + reset_alpha_and_save_solution( + solution, old_problem_ptr, population_ptr, i, last_improved_iteration, best_objective); last_improved_iteration = i; } // if not feasible, it means it is a cycle @@ -700,31 +686,16 @@ bool local_search_t::run_fp(solution_t& solution, if (is_feasible) { CUOPT_LOG_DEBUG("Found feasible during restart with obj %f. Continue with FJ!", solution.get_objective()); - reset_alpha_and_save_solution(solution, - old_problem_ptr, - population_ptr, - i, - last_improved_iteration, - best_solution, - best_objective); + reset_alpha_and_save_solution( + solution, old_problem_ptr, population_ptr, i, last_improved_iteration, best_objective); last_improved_iteration = i; } else { - reset_alpha_and_run_recombiners(solution, - old_problem_ptr, - population_ptr, - i, - last_improved_iteration, - best_solution, - best_objective); + run_recombiners( + solution, old_problem_ptr, population_ptr, i, last_improved_iteration, best_objective); } } } - raft::copy(solution.assignment.data(), - best_solution.data(), - solution.assignment.size(), - solution.handle_ptr->get_stream()); - solution.problem_ptr = old_problem_ptr; - solution.resize_to_problem(); + solution.set_problem_ptr(old_problem_ptr); resize_to_old_problem(old_problem_ptr); solution.handle_ptr->sync_stream(); return is_feasible; diff --git a/cpp/src/mip/local_search/local_search.cuh b/cpp/src/mip/local_search/local_search.cuh index 6fdf4ac72..656076534 100644 --- a/cpp/src/mip/local_search/local_search.cuh +++ b/cpp/src/mip/local_search/local_search.cuh @@ -87,26 +87,28 @@ class local_search_t { const std::string& source); i_t ls_threads() const { return ls_cpu_fj.size() + scratch_cpu_fj.size(); } - void save_solution_and_add_cutting_plane(solution_t& solution, - rmm::device_uvector& best_solution, + void save_solution_and_add_cutting_plane(solution_t& best_in_population, + solution_t& solution, f_t& best_objective); void resize_to_new_problem(); void resize_to_old_problem(problem_t* old_problem_ptr); - void reset_alpha_and_run_recombiners(solution_t& solution, - problem_t* old_problem_ptr, - population_t* population_ptr, - i_t i, - i_t last_unimproved_iteration, - rmm::device_uvector& best_solution, - f_t& best_objective); + void run_recombiners(solution_t& solution, + problem_t* old_problem_ptr, + population_t* population_ptr, + i_t i, + i_t last_unimproved_iteration, + f_t& best_objective); void reset_alpha_and_save_solution(solution_t& solution, problem_t* old_problem_ptr, population_t* population_ptr, i_t i, i_t last_unimproved_iteration, - rmm::device_uvector& best_solution, f_t& best_objective); + void handle_cutting_plane_and_weights(solution_t& solution, + population_t* population_ptr, + f_t& best_objective); + mip_solver_context_t& context; rmm::device_uvector& lp_optimal_solution; bool lp_optimal_exists{false}; diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index c693bf5dd..bc9d5b145 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -42,6 +42,8 @@ rmm::device_uvector get_lower_bounds( template solution_t::solution_t(problem_t& problem_) : problem_ptr(&problem_), + problem_with_cuts_ptr(&problem_), + current_problem_is_cut(false), handle_ptr(problem_.handle_ptr), assignment(std::move(get_lower_bounds(problem_.variable_bounds, handle_ptr))), lower_excess(problem_.n_constraints, handle_ptr->get_stream()), @@ -59,6 +61,8 @@ solution_t::solution_t(problem_t& problem_) template solution_t::solution_t(const solution_t& other) : problem_ptr(other.problem_ptr), + problem_with_cuts_ptr(other.problem_with_cuts_ptr), + current_problem_is_cut(other.current_problem_is_cut), handle_ptr(other.handle_ptr), assignment(other.assignment, handle_ptr->get_stream()), lower_excess(other.lower_excess, handle_ptr->get_stream()), @@ -85,11 +89,13 @@ template void solution_t::copy_from(const solution_t& other_sol) { // TODO handle resize - problem_ptr = other_sol.problem_ptr; - handle_ptr = other_sol.handle_ptr; - h_obj = other_sol.h_obj; - h_user_obj = other_sol.h_user_obj; - h_infeasibility_cost = other_sol.h_infeasibility_cost; + problem_ptr = other_sol.problem_ptr; + problem_with_cuts_ptr = other_sol.problem_with_cuts_ptr; + current_problem_is_cut = other_sol.current_problem_is_cut; + handle_ptr = other_sol.handle_ptr; + h_obj = other_sol.h_obj; + h_user_obj = other_sol.h_user_obj; + h_infeasibility_cost = other_sol.h_infeasibility_cost; expand_device_copy(assignment, other_sol.assignment, handle_ptr->get_stream()); expand_device_copy(lower_excess, other_sol.lower_excess, handle_ptr->get_stream()); expand_device_copy(upper_excess, other_sol.upper_excess, handle_ptr->get_stream()); @@ -123,6 +129,39 @@ void solution_t::resize_to_problem() lp_state.prev_dual.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); } +template +void solution_t::swap_problem_pointers() +{ + current_problem_is_cut = !current_problem_is_cut; + cuopt_assert(problem_with_cuts_ptr != nullptr, "Problem with cuts pointer must be set"); + cuopt_assert(problem_with_cuts_ptr != problem_ptr, "Problem pointers must be different"); + std::swap(problem_ptr, problem_with_cuts_ptr); + resize_to_problem(); + compute_feasibility(); +} + +template +void solution_t::set_problem_ptr(problem_t* _problem_ptr, bool is_cuts_problem) +{ + cuopt_assert(_problem_ptr != nullptr, "Problem pointer must be set"); + // FIXME: when we create ping_pong problems with constant cuts, adjust this logic + if (is_cuts_problem) { + cuopt_assert(!current_problem_is_cut, "Current problem must not be with cuts"); + // set original problem to be swap pointer + problem_with_cuts_ptr = problem_ptr; + // set current problem to new cut problem + problem_ptr = _problem_ptr; + current_problem_is_cut = true; + } else { + problem_ptr = _problem_ptr; + // if we are setting non-cut problem, the cut version should be invalidated + problem_with_cuts_ptr = nullptr; + current_problem_is_cut = false; + } + resize_to_problem(); + compute_feasibility(); +} + template void solution_t::resize_to_original_problem() { diff --git a/cpp/src/mip/solution/solution.cuh b/cpp/src/mip/solution/solution.cuh index 7a525be58..4359c879c 100644 --- a/cpp/src/mip/solution/solution.cuh +++ b/cpp/src/mip/solution/solution.cuh @@ -98,6 +98,8 @@ class solution_t { f_t compute_max_constraint_violation(); f_t compute_max_int_violation(); f_t compute_max_variable_violation(); + void swap_problem_pointers(); + void set_problem_ptr(problem_t* _problem_ptr, bool is_cuts_problem = false); struct view_t { // let's not bloat the class for every simple getter and setters @@ -123,6 +125,8 @@ class solution_t { // we might need to change it later as we tighten the bounds // and run lp on fixed parts problem_t* problem_ptr; + problem_t* problem_with_cuts_ptr; + bool current_problem_is_cut = false; const raft::handle_t* handle_ptr; rmm::device_uvector assignment; rmm::device_uvector lower_excess; diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 1ec95583a..33eda66cb 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -91,38 +91,46 @@ class omp_atomic_t { T fetch_sub(T inc) { return fetch_add(-inc); } + private: + T val; + +#ifndef __NVCC__ + friend double fetch_min(omp_atomic_t& atomic_var, double other); + friend double fetch_max(omp_atomic_t& atomic_var, double other); +#endif +}; + // Atomic CAS are only supported in OpenMP v5.1 // (gcc 12+ or clang 14+), however, nvcc (or the host compiler) cannot // parse it correctly yet #ifndef __NVCC__ - T fetch_min(T other) - { - T old; +// Free non-template functions are necessary because of a clang 20 bug +// when omp atomic compare is used within a templated context. +// see https://github.com/llvm/llvm-project/issues/127466 +inline double fetch_min(omp_atomic_t& atomic_var, double other) +{ + double old; #pragma omp atomic compare capture - { - old = val; - val = other < val ? other : val; - } - return old; + { + old = atomic_var.val; + if (other < atomic_var.val) { atomic_var.val = other; } } + return old; +} - T fetch_max(T other) - { - T old; +inline double fetch_max(omp_atomic_t& atomic_var, double other) +{ + double old; #pragma omp atomic compare capture - { - old = val; - val = other > val ? other : val; - } - return old; + { + old = atomic_var.val; + if (other > atomic_var.val) { atomic_var.val = other; } } + return old; +} #endif - private: - T val; -}; - #endif } // namespace cuopt