diff --git a/cpp/include/cuopt/routing/cython/cython.hpp b/cpp/include/cuopt/routing/cython/cython.hpp index 75898ece7..93fdedc78 100644 --- a/cpp/include/cuopt/routing/cython/cython.hpp +++ b/cpp/include/cuopt/routing/cython/cython.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -16,6 +16,7 @@ #include #include +#include namespace cuopt { namespace cython { @@ -82,6 +83,10 @@ struct dataset_ret_t { std::unique_ptr call_solve(routing::data_model_view_t*, routing::solver_settings_t*); +// Wrapper for batch solve to expose the API to cython. +std::vector> call_batch_solve( + std::vector*>, routing::solver_settings_t*); + // Wrapper for dataset to expose the API to cython. std::unique_ptr call_generate_dataset( raft::handle_t const& handle, routing::generator::dataset_params_t const& params); diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu index 72d75cdc7..3b4cdc2f8 100644 --- a/cpp/src/linear_programming/optimization_problem.cu +++ b/cpp/src/linear_programming/optimization_problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -337,7 +337,7 @@ i_t optimization_problem_t::get_n_integers() const { i_t n_integers = 0; if (get_n_variables() != 0) { - auto enum_variable_types = cuopt::host_copy(get_variable_types()); + auto enum_variable_types = cuopt::host_copy(get_variable_types(), handle_ptr_->get_stream()); for (size_t i = 0; i < enum_variable_types.size(); ++i) { if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; } @@ -591,16 +591,17 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ data_model_view.set_maximize(get_sense()); // Copy to host - auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values()); - auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices()); - auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets()); - auto constraint_bounds = cuopt::host_copy(get_constraint_bounds()); - auto objective_coefficients = cuopt::host_copy(get_objective_coefficients()); - auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds()); - auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds()); - auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds()); - auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds()); - auto row_types = cuopt::host_copy(get_row_types()); + auto stream = handle_ptr_->get_stream(); + auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values(), stream); + auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices(), stream); + auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets(), stream); + auto constraint_bounds = cuopt::host_copy(get_constraint_bounds(), stream); + auto objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream); + auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream); + auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream); + auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds(), stream); + auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds(), stream); + auto row_types = cuopt::host_copy(get_row_types(), stream); // Set constraint matrix in CSR format if (get_nnz() != 0) { @@ -652,7 +653,7 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ std::vector variable_types(get_n_variables()); // Set variable types (convert from enum to char) if (get_n_variables() != 0) { - auto enum_variable_types = cuopt::host_copy(get_variable_types()); + auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream); // Convert enum types to char types for (size_t i = 0; i < variable_types.size(); ++i) { @@ -677,13 +678,17 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ template void optimization_problem_t::print_scaling_information() const { - std::vector constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values()); - std::vector constraint_rhs = cuopt::host_copy(get_constraint_bounds()); - std::vector objective_coefficients = cuopt::host_copy(get_objective_coefficients()); - std::vector variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds()); - std::vector variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds()); - std::vector constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds()); - std::vector constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds()); + auto stream = handle_ptr_->get_stream(); + std::vector constraint_matrix_values = + cuopt::host_copy(get_constraint_matrix_values(), stream); + std::vector constraint_rhs = cuopt::host_copy(get_constraint_bounds(), stream); + std::vector objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream); + std::vector variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream); + std::vector variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream); + std::vector constraint_lower_bounds = + cuopt::host_copy(get_constraint_lower_bounds(), stream); + std::vector constraint_upper_bounds = + cuopt::host_copy(get_constraint_upper_bounds(), stream); auto findMaxAbs = [](const std::vector& vec) -> f_t { if (vec.empty()) { return 0.0; } diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp index 8453ac3e7..19f6c024c 100644 --- a/cpp/src/linear_programming/translate.hpp +++ b/cpp/src/linear_programming/translate.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -116,22 +116,23 @@ void translate_to_crossover_problem(const detail::problem_t& problem, { CUOPT_LOG_DEBUG("Starting translation"); - std::vector pdlp_objective = cuopt::host_copy(problem.objective_coefficients); + auto stream = problem.handle_ptr->get_stream(); + std::vector pdlp_objective = cuopt::host_copy(problem.objective_coefficients, stream); dual_simplex::csr_matrix_t csr_A( problem.n_constraints, problem.n_variables, problem.nnz); - csr_A.x = cuopt::host_copy(problem.coefficients); - csr_A.j = cuopt::host_copy(problem.variables); - csr_A.row_start = cuopt::host_copy(problem.offsets); + csr_A.x = cuopt::host_copy(problem.coefficients, stream); + csr_A.j = cuopt::host_copy(problem.variables, stream); + csr_A.row_start = cuopt::host_copy(problem.offsets, stream); - problem.handle_ptr->get_stream().synchronize(); + stream.synchronize(); CUOPT_LOG_DEBUG("Converting to compressed column"); csr_A.to_compressed_col(lp.A); CUOPT_LOG_DEBUG("Converted to compressed column"); std::vector slack(problem.n_constraints); - std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution()); - problem.handle_ptr->get_stream().synchronize(); + std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream); + stream.synchronize(); dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack); CUOPT_LOG_DEBUG("Multiplied A and x"); @@ -161,8 +162,8 @@ void translate_to_crossover_problem(const detail::problem_t& problem, auto [lower, upper] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); - std::vector constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds); - std::vector constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds); + std::vector constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds, stream); + std::vector constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds, stream); lp.objective.resize(n, 0.0); std::copy( @@ -187,10 +188,10 @@ void translate_to_crossover_problem(const detail::problem_t& problem, if (initial_solution.x[j] > lp.upper[j]) { initial_solution.x[j] = lp.upper[j]; } } CUOPT_LOG_DEBUG("Finished with x"); - initial_solution.y = cuopt::host_copy(sol.get_dual_solution()); + initial_solution.y = cuopt::host_copy(sol.get_dual_solution(), stream); - std::vector tmp_z = cuopt::host_copy(sol.get_reduced_cost()); - problem.handle_ptr->get_stream().synchronize(); + std::vector tmp_z = cuopt::host_copy(sol.get_reduced_cost(), stream); + stream.synchronize(); std::copy(tmp_z.begin(), tmp_z.begin() + problem.n_variables, initial_solution.z.begin()); for (i_t j = problem.n_variables; j < n; ++j) { initial_solution.z[j] = initial_solution.y[j - problem.n_variables]; diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index b7e3a5331..ba648f30e 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights * reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -221,7 +221,7 @@ void rins_t::run_rins() &rins_handle, &fixed_problem, context.settings, context.scaling); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); - fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment)); + fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); std::vector default_weights(fixed_problem.n_constraints, 1.); cpu_fj_thread_t cpu_fj_thread; cpu_fj_thread.fj_cpu = fj.create_cpu_climber(fj_solution, diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh index 051d51483..94cc66399 100644 --- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -190,7 +190,7 @@ class bound_prop_recombiner_t : public recombiner_t { probing_values, n_vars_from_other, variable_map); - probing_config.probing_values = host_copy(probing_values); + probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream()); probing_config.n_of_fixed_from_first = fixed_from_guiding; probing_config.n_of_fixed_from_second = fixed_from_other; probing_config.use_balanced_probing = true; @@ -214,7 +214,7 @@ class bound_prop_recombiner_t : public recombiner_t { timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit); get_probing_values_for_infeasible( guiding_solution, other_solution, offspring, probing_values, n_vars_from_other); - probing_config.probing_values = host_copy(probing_values); + probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream()); constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config); } constraint_prop.max_n_failed_repair_iterations = 1; diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index a3353e72f..ce497adfa 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -110,7 +110,8 @@ void local_search_t::start_cpufj_lptopt_scratch_threads( std::vector default_weights(context.problem_ptr->n_constraints, 1.); solution_t solution_lp(*context.problem_ptr); - solution_lp.copy_new_assignment(host_copy(lp_optimal_solution)); + solution_lp.copy_new_assignment( + host_copy(lp_optimal_solution, context.problem_ptr->handle_ptr->get_stream())); solution_lp.round_random_nearest(500); scratch_cpu_fj_on_lp_opt.fj_cpu = fj.create_cpu_climber( solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_); diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu index 0d8fb6a08..6d0fb767d 100644 --- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu +++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -230,11 +230,12 @@ void conditional_bound_strengthening_t::select_constraint_pairs_host( #ifdef DEBUG_COND_BOUNDS_PROP auto start_time = std::chrono::high_resolution_clock::now(); #endif - auto variables = cuopt::host_copy(problem.variables); - auto offsets = cuopt::host_copy(problem.offsets); + auto stream = problem.handle_ptr->get_stream(); + auto variables = cuopt::host_copy(problem.variables, stream); + auto offsets = cuopt::host_copy(problem.offsets, stream); - auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints); - auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets); + auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints, stream); + auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets, stream); std::vector constraint_pairs_h(max_pair_per_row * problem.n_constraints, {-1, -1}); std::unordered_set cnstr_pair; @@ -295,8 +296,8 @@ void conditional_bound_strengthening_t::select_constraint_pairs_device colsC, valsC); std::vector constraint_pairs_h; - offsets_h = cuopt::host_copy(offsetsC); - cols_h = cuopt::host_copy(colsC); + offsets_h = cuopt::host_copy(offsetsC, stream); + cols_h = cuopt::host_copy(colsC, stream); constraint_pairs_h.reserve(max_pair_per_row * problem.n_constraints); for (int i = 0; i < problem.n_constraints; ++i) { @@ -654,8 +655,9 @@ void conditional_bound_strengthening_t::solve(problem_t& pro raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size; #ifdef DEBUG_COND_BOUNDS_PROP - auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds); - auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds); + auto debug_stream = problem.handle_ptr->get_stream(); + auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream); + auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream); auto start_time = std::chrono::high_resolution_clock::now(); #endif @@ -674,8 +676,8 @@ void conditional_bound_strengthening_t::solve(problem_t& pro double time_for_presolve = std::chrono::duration_cast(end_time - start_time).count(); - auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds); - auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds); + auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream); + auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream); int num_improvements = 0; int num_new_equality = 0; diff --git a/cpp/src/mip/presolve/lb_probing_cache.cu b/cpp/src/mip/presolve/lb_probing_cache.cu index 4a03a86fd..790ed32e4 100644 --- a/cpp/src/mip/presolve/lb_probing_cache.cu +++ b/cpp/src/mip/presolve/lb_probing_cache.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -302,7 +302,7 @@ inline std::vector compute_prioritized_integer_indices( } return false; }); - auto h_priority_indices = host_copy(priority_indices); + auto h_priority_indices = host_copy(priority_indices, problem.pb->handle_ptr->get_stream()); return h_priority_indices; } @@ -315,9 +315,10 @@ void compute_probing_cache(load_balanced_bounds_presolve_t& bound_pres auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); // std::cout<<"priority_indices\n"; CUOPT_LOG_DEBUG("Computing probing cache"); - auto h_integer_indices = host_copy(problem.pb->integer_indices); - auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds); - auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds); + auto stream = problem.pb->handle_ptr->get_stream(); + auto h_integer_indices = host_copy(problem.pb->integer_indices, stream); + auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds, stream); + auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds, stream); size_t n_of_cached_probings = 0; // TODO adjust the iteration limit depending on the total time limit and time it takes for single // var diff --git a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh index 03ff6b2c0..0ace09afb 100644 --- a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -181,7 +181,7 @@ class log_dist_t { log_dist_t() = default; log_dist_t(rmm::device_uvector& vertex_id, rmm::device_uvector& bin_offsets) - : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets)) + : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets, bin_offsets.stream())) { // If bin_offsets_ is smaller than NumberBins then resize it // so that the last element is repeated diff --git a/cpp/src/mip/presolve/probing_cache.cu b/cpp/src/mip/presolve/probing_cache.cu index 18620dc51..e191cdde9 100644 --- a/cpp/src/mip/presolve/probing_cache.cu +++ b/cpp/src/mip/presolve/probing_cache.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -346,7 +346,7 @@ inline std::vector compute_prioritized_integer_indices( } return false; }); - auto h_priority_indices = host_copy(priority_indices); + auto h_priority_indices = host_copy(priority_indices, problem.handle_ptr->get_stream()); problem.handle_ptr->sync_stream(); return h_priority_indices; } @@ -461,8 +461,9 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, // we dont want to compute the probing cache for all variables for time and computation resources auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); CUOPT_LOG_DEBUG("Computing probing cache"); - auto h_integer_indices = host_copy(problem.integer_indices); - const auto h_var_bounds = host_copy(problem.variable_bounds); + auto stream = problem.handle_ptr->get_stream(); + auto h_integer_indices = host_copy(problem.integer_indices, stream); + const auto h_var_bounds = host_copy(problem.variable_bounds, stream); // TODO adjust the iteration limit depending on the total time limit and time it takes for single // var bound_presolve.settings.iteration_limit = 50; diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh index cf9659662..c2a3927ea 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cuh +++ b/cpp/src/mip/presolve/trivial_presolve.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -313,12 +313,13 @@ void update_from_csr(problem_t& pb) template void test_reverse_matches(const problem_t& pb) { - auto h_offsets = cuopt::host_copy(pb.offsets); - auto h_coefficients = cuopt::host_copy(pb.coefficients); - auto h_variables = cuopt::host_copy(pb.variables); - auto h_reverse_offsets = cuopt::host_copy(pb.reverse_offsets); - auto h_reverse_constraints = cuopt::host_copy(pb.reverse_constraints); - auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients); + auto stream = pb.handle_ptr->get_stream(); + auto h_offsets = cuopt::host_copy(pb.offsets, stream); + auto h_coefficients = cuopt::host_copy(pb.coefficients, stream); + auto h_variables = cuopt::host_copy(pb.variables, stream); + auto h_reverse_offsets = cuopt::host_copy(pb.reverse_offsets, stream); + auto h_reverse_constraints = cuopt::host_copy(pb.reverse_constraints, stream); + auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients, stream); std::vector> vars_per_constr(pb.n_constraints); std::vector> coeff_per_constr(pb.n_constraints); diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 5be50fb05..815ef5aa0 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1344,7 +1344,7 @@ problem_t problem_t::get_problem_after_fixing_vars( // do an assignment from the original_ids of the current problem problem.original_ids.resize(variable_map.size()); std::fill(problem.reverse_original_ids.begin(), problem.reverse_original_ids.end(), -1); - auto h_variable_map = cuopt::host_copy(variable_map); + auto h_variable_map = cuopt::host_copy(variable_map, handle_ptr->get_stream()); for (size_t i = 0; i < variable_map.size(); ++i) { cuopt_assert(h_variable_map[i] < original_ids.size(), "Variable index out of bounds"); problem.original_ids[i] = original_ids[h_variable_map[i]]; @@ -1522,9 +1522,10 @@ std::vector>> compute_var_to_constraint_map( { raft::common::nvtx::range fun_scope("compute_var_to_constraint_map"); std::vector>> variable_constraint_map(pb.n_variables); - auto h_variables = cuopt::host_copy(pb.variables); - auto h_coefficients = cuopt::host_copy(pb.coefficients); - auto h_offsets = cuopt::host_copy(pb.offsets); + auto stream = pb.handle_ptr->get_stream(); + auto h_variables = cuopt::host_copy(pb.variables, stream); + auto h_coefficients = cuopt::host_copy(pb.coefficients, stream); + auto h_offsets = cuopt::host_copy(pb.offsets, stream); for (i_t cnst = 0; cnst < pb.n_constraints; ++cnst) { for (i_t i = h_offsets[cnst]; i < h_offsets[cnst + 1]; ++i) { i_t var = h_variables[i]; @@ -1542,10 +1543,11 @@ void standardize_bounds(std::vector>>& variable_ { raft::common::nvtx::range fun_scope("standardize_bounds"); auto handle_ptr = pb.handle_ptr; - auto h_var_bounds = cuopt::host_copy(pb.variable_bounds); - auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients); - auto h_variable_types = cuopt::host_copy(pb.variable_types); - auto h_var_flags = cuopt::host_copy(pb.presolve_data.var_flags); + auto stream = handle_ptr->get_stream(); + auto h_var_bounds = cuopt::host_copy(pb.variable_bounds, stream); + auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients, stream); + auto h_variable_types = cuopt::host_copy(pb.variable_types, stream); + auto h_var_flags = cuopt::host_copy(pb.presolve_data.var_flags, stream); handle_ptr->sync_stream(); const i_t n_vars_originally = (i_t)h_var_bounds.size(); @@ -1687,12 +1689,13 @@ void problem_t::get_host_user_problem( i_t nz = nnz; user_problem.num_rows = m; user_problem.num_cols = n; - user_problem.objective = cuopt::host_copy(objective_coefficients); + auto stream = handle_ptr->get_stream(); + user_problem.objective = cuopt::host_copy(objective_coefficients, stream); dual_simplex::csr_matrix_t csr_A(m, n, nz); - csr_A.x = cuopt::host_copy(coefficients); - csr_A.j = cuopt::host_copy(variables); - csr_A.row_start = cuopt::host_copy(offsets); + csr_A.x = cuopt::host_copy(coefficients, stream); + csr_A.j = cuopt::host_copy(variables, stream); + csr_A.row_start = cuopt::host_copy(offsets, stream); csr_A.to_compressed_col(user_problem.A); @@ -1701,8 +1704,8 @@ void problem_t::get_host_user_problem( user_problem.range_rows.clear(); user_problem.range_value.clear(); - auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds); - auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds); + auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds, stream); + auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds, stream); // All constraints have lower and upper bounds // lr <= a_i^T x <= ur @@ -1763,7 +1766,7 @@ void problem_t::get_host_user_problem( user_problem.obj_scale = presolve_data.objective_scaling_factor; user_problem.var_types.resize(n); - auto model_variable_types = cuopt::host_copy(variable_types); + auto model_variable_types = cuopt::host_copy(variable_types, stream); for (int j = 0; j < n; ++j) { user_problem.var_types[j] = model_variable_types[j] == var_t::CONTINUOUS @@ -1781,7 +1784,8 @@ template void problem_t::compute_vars_with_objective_coeffs() { raft::common::nvtx::range fun_scope("compute_vars_with_objective_coeffs"); - auto h_objective_coefficients = cuopt::host_copy(objective_coefficients); + auto h_objective_coefficients = + cuopt::host_copy(objective_coefficients, handle_ptr->get_stream()); std::vector vars_with_objective_coeffs_; std::vector objective_coeffs_; for (i_t i = 0; i < n_variables; ++i) { diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index 36bef4142..9e9a2d75f 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -229,11 +229,12 @@ void solution_t::assign_random_within_bounds(f_t ratio_of_vars_to_rand bool only_integers) { std::mt19937 rng(cuopt::seed_generator::get_seed()); - std::vector h_assignment = host_copy(assignment); + auto stream = handle_ptr->get_stream(); + std::vector h_assignment = host_copy(assignment, stream); std::uniform_real_distribution unif_prob(0, 1); - auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds); - auto variable_types = cuopt::host_copy(problem_ptr->variable_types); + auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds, stream); + auto variable_types = cuopt::host_copy(problem_ptr->variable_types, stream); problem_ptr->handle_ptr->sync_stream(); for (size_t i = 0; i < problem_ptr->variable_bounds.size(); ++i) { if (only_integers && variable_types[i] != var_t::INTEGER) { continue; } diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index e5161882e..e6a392d40 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -259,7 +259,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, std::numeric_limits::signaling_NaN()); detail::problem_t full_problem(op_problem); detail::solution_t full_sol(full_problem); - full_sol.copy_new_assignment(cuopt::host_copy(primal_solution)); + full_sol.copy_new_assignment( + cuopt::host_copy(primal_solution, op_problem.get_handle_ptr()->get_stream())); full_sol.compute_feasibility(); if (!full_sol.get_feasible()) { CUOPT_LOG_WARN("The solution is not feasible after post solve"); diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 0da4c6398..7311a26fd 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -136,7 +136,8 @@ solution_t mip_solver_t::run_solver() auto opt_sol = solve_lp_with_method(*context.problem_ptr, settings, lp_timer); solution_t sol(*context.problem_ptr); - sol.copy_new_assignment(host_copy(opt_sol.get_primal_solution())); + sol.copy_new_assignment( + host_copy(opt_sol.get_primal_solution(), context.problem_ptr->handle_ptr->get_stream())); if (opt_sol.get_termination_status() == pdlp_termination_status_t::Optimal || opt_sol.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible || opt_sol.get_termination_status() == pdlp_termination_status_t::DualInfeasible) { diff --git a/cpp/src/routing/adapters/adapted_sol.cuh b/cpp/src/routing/adapters/adapted_sol.cuh index 48ebbdf2f..463f876ff 100644 --- a/cpp/src/routing/adapters/adapted_sol.cuh +++ b/cpp/src/routing/adapters/adapted_sol.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -449,8 +449,9 @@ struct adapted_sol_t { void populate_unserviced_nodes() { raft::common::nvtx::range fun_scope("populate_unserviced_nodes"); - has_unserviced_nodes = false; - auto h_route_id_per_node = host_copy(sol.route_node_map.route_id_per_node); + has_unserviced_nodes = false; + auto h_route_id_per_node = + host_copy(sol.route_node_map.route_id_per_node, sol.sol_handle->get_stream()); for (size_t i = 0; i < h_route_id_per_node.size(); ++i) { if (h_route_id_per_node[i] == -1) { pred[i] = NodeInfo<>(); @@ -487,12 +488,13 @@ struct adapted_sol_t { skip_route_copy = false; } std::vector h_routes_to_copy; - if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy); + if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy, sol.sol_handle->get_stream()); for (i_t i = 0; i < sol.n_routes && !skip_route_copy; ++i) { if (!copy_all && h_routes_to_copy[i] == 0) continue; - auto& curr_route = sol.get_route(i); - auto node_infos_temp = host_copy(curr_route.dimensions.requests.node_info); - i_t n_nodes = curr_route.n_nodes.value(sol.sol_handle->get_stream()); + auto& curr_route = sol.get_route(i); + auto node_infos_temp = + host_copy(curr_route.dimensions.requests.node_info, sol.sol_handle->get_stream()); + i_t n_nodes = curr_route.n_nodes.value(sol.sol_handle->get_stream()); // Remove break nodes for diversity std::vector> node_infos; diff --git a/cpp/src/routing/adapters/assignment_adapter.cuh b/cpp/src/routing/adapters/assignment_adapter.cuh index e7e945eb9..c41c3e161 100644 --- a/cpp/src/routing/adapters/assignment_adapter.cuh +++ b/cpp/src/routing/adapters/assignment_adapter.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -84,18 +84,18 @@ assignment_t ges_solver_t::get_ges_assignment( auto route_id = route.route_id.value(stream); auto vehicle_id = route.vehicle_id.value(stream); - auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info); + auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info, stream); std::vector departure_forward_h(node_infos_h.size(), 0.); std::vector actual_arrival_h(node_infos_h.size(), 0.); std::vector earliest_arrival_backward_h(node_infos_h.size(), 0.); std::vector latest_arrival_forward_h(node_infos_h.size(), 0.); if (problem.dimensions_info.has_dimension(detail::dim_t::TIME)) { - departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward); - actual_arrival_h = cuopt::host_copy(route.dimensions.time_dim.actual_arrival); + departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward, stream); + actual_arrival_h = cuopt::host_copy(route.dimensions.time_dim.actual_arrival, stream); earliest_arrival_backward_h = - cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward); + cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward, stream); latest_arrival_forward_h = - cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward); + cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward, stream); } i_t drop_return_trip = sol.problem_ptr->drop_return_trip_h[vehicle_id]; diff --git a/cpp/src/routing/adapters/solution_adapter.cuh b/cpp/src/routing/adapters/solution_adapter.cuh index ba292a688..5571f4b3b 100644 --- a/cpp/src/routing/adapters/solution_adapter.cuh +++ b/cpp/src/routing/adapters/solution_adapter.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -25,10 +25,11 @@ void fill_routes_data(solution_t& sol, const problem_t& problem) { const auto n_routes = assignment.get_vehicle_count(); - auto h_route = cuopt::host_copy(assignment.get_route()); - auto h_truck_ids = cuopt::host_copy(assignment.get_truck_id()); - auto h_route_locations = cuopt::host_copy(assignment.get_order_locations()); - auto h_node_types = cuopt::host_copy(assignment.get_node_types()); + auto stream = sol.sol_handle->get_stream(); + auto h_route = cuopt::host_copy(assignment.get_route(), stream); + auto h_truck_ids = cuopt::host_copy(assignment.get_truck_id(), stream); + auto h_route_locations = cuopt::host_copy(assignment.get_order_locations(), stream); + auto h_node_types = cuopt::host_copy(assignment.get_node_types(), stream); sol.sol_handle->sync_stream(); assignment.get_truck_id().stream().synchronize(); diff --git a/cpp/src/routing/assignment.cu b/cpp/src/routing/assignment.cu index 6bf468502..4636fa735 100644 --- a/cpp/src/routing/assignment.cu +++ b/cpp/src/routing/assignment.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -248,13 +248,14 @@ void assignment_t::print(std::ostream& os) const noexcept template host_assignment_t::host_assignment_t(const assignment_t& routing_solution) { - route = cuopt::host_copy(routing_solution.get_route()); - truck_id = cuopt::host_copy(routing_solution.get_truck_id()); - stamp = cuopt::host_copy(routing_solution.get_arrival_stamp()); - locations = cuopt::host_copy(routing_solution.get_order_locations()); - node_types = cuopt::host_copy(routing_solution.get_node_types()); - unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes()); - accepted = cuopt::host_copy(routing_solution.get_accepted()); + auto stream = routing_solution.get_route().stream(); + route = cuopt::host_copy(routing_solution.get_route(), stream); + truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); + stamp = cuopt::host_copy(routing_solution.get_arrival_stamp(), stream); + locations = cuopt::host_copy(routing_solution.get_order_locations(), stream); + node_types = cuopt::host_copy(routing_solution.get_node_types(), stream); + unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes(), stream); + accepted = cuopt::host_copy(routing_solution.get_accepted(), stream); } template diff --git a/cpp/src/routing/crossovers/ox_graph.hpp b/cpp/src/routing/crossovers/ox_graph.hpp index 6f65b0843..55d2e39a0 100644 --- a/cpp/src/routing/crossovers/ox_graph.hpp +++ b/cpp/src/routing/crossovers/ox_graph.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -39,14 +39,14 @@ struct ox_graph_t { std::vector buckets; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.row_sizes = host_copy(row_sizes); - h.route_ids = host_copy(route_ids); - h.indices = host_copy(indices); - h.weights = host_copy(weights); - h.buckets = host_copy(buckets); + h.row_sizes = host_copy(row_sizes, stream); + h.route_ids = host_copy(route_ids, stream); + h.indices = host_copy(indices, stream); + h.weights = host_copy(weights, stream); + h.buckets = host_copy(buckets, stream); return h; } diff --git a/cpp/src/routing/crossovers/ox_recombiner.cuh b/cpp/src/routing/crossovers/ox_recombiner.cuh index 17823c28b..681943d3c 100644 --- a/cpp/src/routing/crossovers/ox_recombiner.cuh +++ b/cpp/src/routing/crossovers/ox_recombiner.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -433,7 +433,7 @@ struct OX { h_graph[i].reserve(problem_size); } - adj_to_host(h_graph); + adj_to_host(h_graph, A.sol.sol_handle->get_stream()); std::vector> h_path_cost(problem_size + 1); // Vector of parents to recreate optimal path @@ -500,7 +500,7 @@ struct OX { } } - void test_transpose_graph() + void test_transpose_graph(rmm::cuda_stream_view stream) { std::vector>> h_transpose_graph(offspring.size()); for (size_t i = 0; i < h_transpose_graph.size(); ++i) { @@ -512,7 +512,7 @@ struct OX { tmp_graph[i].reserve(problem_size); } - adj_to_host(tmp_graph); + adj_to_host(tmp_graph, stream); for (size_t i = 0; i < tmp_graph.size(); ++i) { for (size_t j = 0; j < tmp_graph[i].size(); ++j) { @@ -521,7 +521,7 @@ struct OX { } } - auto tmp_transpose = transpose_graph.to_host(); + auto tmp_transpose = transpose_graph.to_host(stream); for (size_t i = 0; i < h_transpose_graph.size(); ++i) { auto transpose_offset = @@ -618,7 +618,7 @@ struct OX { raft::common::nvtx::range fun_scope("bellman_ford"); compute_transpose_graph(A); - cuopt_func_call(test_transpose_graph()); + cuopt_func_call(test_transpose_graph(A.sol.sol_handle->get_stream())); auto row_size = offspring.size(); d_path_cost.resize((problem_size + 1) * row_size, A.sol.sol_handle->get_stream()); @@ -789,9 +789,10 @@ struct OX { offspring[0] = 0; } - void adj_to_host(std::vector>>& h_graph) + void adj_to_host(std::vector>>& h_graph, + rmm::cuda_stream_view stream) { - auto tmp_graph = d_graph.to_host(); + auto tmp_graph = d_graph.to_host(stream); for (int veh = 0; veh < n_buckets; ++veh) { for (size_t i = 0; i < d_graph.get_num_vertices(); ++i) { auto row_size = tmp_graph.row_sizes[veh * d_graph.get_num_vertices() + i]; @@ -818,7 +819,7 @@ struct OX { for (size_t i = 0; i < h_graph.size(); ++i) { h_graph[i].reserve(max_route_len); } - adj_to_host(h_graph); + adj_to_host(h_graph, A.sol.sol_handle->get_stream()); const auto& dimensions_info = A.problem->dimensions_info; diff --git a/cpp/src/routing/cuda_graph.cuh b/cpp/src/routing/cuda_graph.cuh index aa7d890d1..1fb2425d2 100644 --- a/cpp/src/routing/cuda_graph.cuh +++ b/cpp/src/routing/cuda_graph.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -20,7 +20,9 @@ namespace detail { struct cuda_graph_t { void start_capture(rmm::cuda_stream_view stream) { - cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); + // Use ThreadLocal mode to allow multi-threaded batch execution + // Global mode blocks other streams from performing operations during capture + cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal); capture_started = true; } diff --git a/cpp/src/routing/fleet_info.hpp b/cpp/src/routing/fleet_info.hpp index 929a26959..1a37c6655 100644 --- a/cpp/src/routing/fleet_info.hpp +++ b/cpp/src/routing/fleet_info.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -70,32 +70,29 @@ class fleet_info_t { v_buckets_.resize(size, stream); } - auto to_host() + auto to_host(rmm::cuda_stream_view stream) { host_t h; - h.break_offset = host_copy(v_break_offset_); - h.break_durations = host_copy(v_break_duration_); - h.break_earliest = host_copy(v_break_earliest_); - h.break_latest = host_copy(v_break_latest_); - h.earliest_time = host_copy(v_earliest_time_); - h.latest_time = host_copy(v_latest_time_); - h.start_locations = host_copy(v_start_locations_); - h.return_locations = host_copy(v_return_locations_); - h.drop_return_trip = host_copy(v_drop_return_trip_); - h.skip_first_trip = host_copy(v_skip_first_trip_); - h.capacities = host_copy(v_capacities_); - h.max_costs = host_copy(v_max_costs_); - h.max_times = host_copy(v_max_times_); - h.fixed_costs = host_copy(v_fixed_costs_); - h.fleet_order_constraints = fleet_order_constraints_.to_host(); - h.types = host_copy(v_types_); - h.buckets = host_copy(v_buckets_); + h.break_offset = host_copy(v_break_offset_, stream); + h.break_durations = host_copy(v_break_duration_, stream); + h.break_earliest = host_copy(v_break_earliest_, stream); + h.break_latest = host_copy(v_break_latest_, stream); + h.earliest_time = host_copy(v_earliest_time_, stream); + h.latest_time = host_copy(v_latest_time_, stream); + h.start_locations = host_copy(v_start_locations_, stream); + h.return_locations = host_copy(v_return_locations_, stream); + h.drop_return_trip = host_copy(v_drop_return_trip_, stream); + h.skip_first_trip = host_copy(v_skip_first_trip_, stream); + h.capacities = host_copy(v_capacities_, stream); + h.max_costs = host_copy(v_max_costs_, stream); + h.max_times = host_copy(v_max_times_, stream); + h.fixed_costs = host_copy(v_fixed_costs_, stream); + h.fleet_order_constraints = fleet_order_constraints_.to_host(stream); + h.types = host_copy(v_types_, stream); + h.buckets = host_copy(v_buckets_, stream); h.matrices = detail::create_host_mdarray( matrices_.extent[2], matrices_.extent[0], matrices_.extent[1]); - raft::copy(h.matrices.buffer.data(), - matrices_.buffer.data(), - matrices_.buffer.size(), - matrices_.buffer.stream()); + raft::copy(h.matrices.buffer.data(), matrices_.buffer.data(), matrices_.buffer.size(), stream); return h; } diff --git a/cpp/src/routing/fleet_order_constraints.hpp b/cpp/src/routing/fleet_order_constraints.hpp index fdd8f8fd8..c6be63a87 100644 --- a/cpp/src/routing/fleet_order_constraints.hpp +++ b/cpp/src/routing/fleet_order_constraints.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -67,11 +67,11 @@ struct fleet_order_constraints_t { i_t n_vehicles; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.order_service_times = host_copy(order_service_times); - auto tmp_order_match = host_copy(order_match); + h.order_service_times = host_copy(order_service_times, stream); + auto tmp_order_match = host_copy(order_match, stream); h.order_match = thrust::host_vector(tmp_order_match); h.n_orders = n_orders; h.n_vehicles = n_vehicles; diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu index 37e5905dd..194f73b99 100644 --- a/cpp/src/routing/ges_solver.cu +++ b/cpp/src/routing/ges_solver.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -30,7 +30,10 @@ ges_solver_t::ges_solver_t(const data_model_view_t& : timer(time_limit_), problem(data_model, solver_settings), // override for now - pool_allocator(problem, max_sol_per_population, expected_route_count_), + pool_allocator(problem, + max_sol_per_population, + data_model.get_handle_ptr()->get_stream(), + expected_route_count_), expected_route_count(expected_route_count_), intermediate_file(intermediate_file_) { diff --git a/cpp/src/routing/local_search/cycle_finder/cycle.hpp b/cpp/src/routing/local_search/cycle_finder/cycle.hpp index b38cb72c6..7e3e275e1 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle.hpp +++ b/cpp/src/routing/local_search/cycle_finder/cycle.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,11 +45,11 @@ struct ret_cycles_t { i_t n_cycles; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.paths = host_copy(paths); - h.offsets = host_copy(offsets); + h.paths = host_copy(paths, stream); + h.offsets = host_copy(offsets, stream); h.n_cycles = size(); return h; } diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu index a8a5d8aaf..65d654b06 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu +++ b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -213,8 +213,9 @@ template bool ExactCycleFinder::check_cycle(graph_t& graph, ret_cycles_t& ret) { - auto h_graph = graph.to_host(); - auto h_cycles = ret.to_host(); + auto stream = handle_ptr->get_stream(); + auto h_graph = graph.to_host(stream); + auto h_cycles = ret.to_host(stream); bool cost_matches = true; std::unordered_set changed_route_ids; for (i_t cycle = 0; cycle < h_cycles.n_cycles; ++cycle) { diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp index a08f5f1a3..3c28f78bc 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp +++ b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,13 +45,13 @@ struct graph_t { std::vector weights; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.row_sizes = host_copy(row_sizes); - h.route_ids = host_copy(route_ids); - h.indices = host_copy(indices); - h.weights = host_copy(weights); + h.row_sizes = host_copy(row_sizes, stream); + h.route_ids = host_copy(route_ids, stream); + h.indices = host_copy(indices, stream); + h.weights = host_copy(weights, stream); return h; } diff --git a/cpp/src/routing/order_info.hpp b/cpp/src/routing/order_info.hpp index 6f0fb1ecc..d20c46a8e 100644 --- a/cpp/src/routing/order_info.hpp +++ b/cpp/src/routing/order_info.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -57,14 +57,14 @@ class order_info_t { bool is_pdp() const { return !v_pair_indices_.is_empty(); } - auto to_host() + auto to_host(rmm::cuda_stream_view stream) { host_t h; - h.earliest_time = cuopt::host_copy(v_earliest_time_); - h.latest_time = cuopt::host_copy(v_latest_time_); - h.demand = cuopt::host_copy(v_demand_); - h.prizes = cuopt::host_copy(v_prizes_); - h.order_locations = cuopt::host_copy(v_order_locations_); + h.earliest_time = cuopt::host_copy(v_earliest_time_, stream); + h.latest_time = cuopt::host_copy(v_latest_time_, stream); + h.demand = cuopt::host_copy(v_demand_, stream); + h.prizes = cuopt::host_copy(v_prizes_, stream); + h.order_locations = cuopt::host_copy(v_order_locations_, stream); h.depot_included = depot_included_; return h; } diff --git a/cpp/src/routing/problem/problem.cu b/cpp/src/routing/problem/problem.cu index 1df7077fb..4335b9373 100644 --- a/cpp/src/routing/problem/problem.cu +++ b/cpp/src/routing/problem/problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -55,7 +55,7 @@ problem_t::problem_t(const data_model_view_t& data_model_vie pair_indices_h.size(), handle_ptr->get_stream()); - vehicle_types_h = cuopt::host_copy(fleet_info.v_types_); + vehicle_types_h = cuopt::host_copy(fleet_info.v_types_, handle_ptr->get_stream()); for (auto& vtype : vehicle_types_h) { if (!distance_matrices_h.count(vtype)) { auto cost_matrix = fleet_info.matrices_.get_cost_matrix(vtype); @@ -104,7 +104,7 @@ void problem_t::populate_vehicle_buckets() { auto fleet_size = data_view_ptr->get_fleet_size(); vehicle_buckets_h.resize(fleet_size); - fleet_info_h = fleet_info.to_host(); + fleet_info_h = fleet_info.to_host(handle_ptr->get_stream()); // infer vehicle types from data model for (int vehicle_id = 0; vehicle_id < fleet_size; ++vehicle_id) { @@ -375,7 +375,7 @@ void problem_t::populate_host_arrays() auto pickup_indices = data_view_ptr->get_pickup_delivery_pair().first; auto stream = data_view_ptr->get_handle_ptr()->get_stream(); - order_locations_h = cuopt::host_copy(order_info.v_order_locations_); + order_locations_h = cuopt::host_copy(order_info.v_order_locations_, stream); // Temporarily fill is_pickup_h for diversity, should use NodeInfo instead bool is_pdp = pickup_indices != nullptr; std::vector h_pickup_indices(get_num_requests()); @@ -387,18 +387,20 @@ void problem_t::populate_host_arrays() } } - drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_); - skip_first_trip_h = cuopt::host_copy(fleet_info.v_skip_first_trip_); - order_info_h = order_info.to_host(); + drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_, stream); + skip_first_trip_h = cuopt::host_copy(fleet_info.v_skip_first_trip_, stream); + order_info_h = order_info.to_host(stream); handle_ptr->sync_stream(); } template void problem_t::initialize_depot_info() { - int nvehicles = fleet_info.v_start_locations_.size(); - auto vehicle_start_locations = cuopt::host_copy(fleet_info.v_start_locations_); - auto vehicle_return_locations = cuopt::host_copy(fleet_info.v_return_locations_); + int nvehicles = fleet_info.v_start_locations_.size(); + auto vehicle_start_locations = + cuopt::host_copy(fleet_info.v_start_locations_, handle_ptr->get_stream()); + auto vehicle_return_locations = + cuopt::host_copy(fleet_info.v_return_locations_, handle_ptr->get_stream()); start_depot_node_infos_h.resize(nvehicles); return_depot_node_infos_h.resize(nvehicles); @@ -518,8 +520,8 @@ void problem_t::populate_special_nodes() int n_vehicles = get_fleet_size(); - auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_); - auto vehicle_latest_h = cuopt::host_copy(fleet_info.v_latest_time_); + auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_, handle_ptr->get_stream()); + auto vehicle_latest_h = cuopt::host_copy(fleet_info.v_latest_time_, handle_ptr->get_stream()); std::map> break_earliest_h, break_latest_h, break_duration_h; std::vector break_offset_h(n_vehicles + 1, 0), break_nodes_offset_h; diff --git a/cpp/src/routing/solution/pool_allocator.cuh b/cpp/src/routing/solution/pool_allocator.cuh index 89049a698..d78df6951 100644 --- a/cpp/src/routing/solution/pool_allocator.cuh +++ b/cpp/src/routing/solution/pool_allocator.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -42,8 +42,11 @@ class routing_resource_t { template class pool_allocator_t { public: - pool_allocator_t(const Problem& problem_, i_t n_solutions_, i_t desired_n_routes = -1) - : stream_pool(n_solutions_), problem(problem_) + pool_allocator_t(const Problem& problem_, + i_t n_solutions_, + rmm::cuda_stream_view stream_, + i_t desired_n_routes = -1) + : problem(problem_), stream(stream_) { raft::common::nvtx::range fun_scope("pool_allocator_t"); // FIXME:: This is temporary, we should let the diversity manager decide this @@ -54,8 +57,7 @@ class pool_allocator_t { } sol_handles.reserve(n_solutions_); for (i_t i = 0; i < n_solutions_; ++i) { - sol_handles.emplace_back( - std::make_unique>(stream_pool.get_stream(i))); + sol_handles.emplace_back(std::make_unique>(stream)); } Solution dummy_sol{problem_, 0, sol_handles[0].get()}; resource_pool = @@ -68,22 +70,10 @@ class pool_allocator_t { } } - void sync_all_streams() const - { - for (size_t i = 0; i < stream_pool.get_pool_size(); ++i) { - stream_pool.get_stream(i).synchronize(); - } - } - - // a stream pool that will be used to execute different solutions on - // we are currently not using raft handles stream pool as it is constructed in python layer - // TODO: later consider using raft stream pool and construct it on python layer - // however that pushes some internal logic to the higher levels which we want to avoid - // rmm::cuda_stream_pool is non-movable as it contains an atomic variables - // KEEP THIS MEMBER ABOVE OTHER MEMBERS, so that it is destructed the last - rmm::cuda_stream_pool stream_pool; + void sync_all_streams() const { stream.synchronize(); } // problem description + rmm::cuda_stream_view stream; const Problem& problem; std::vector>> sol_handles; // keep a thread safe pool of local search and ges objects that can be reused diff --git a/cpp/src/routing/solution/solution.cu b/cpp/src/routing/solution/solution.cu index eeed02568..edd3bef9a 100644 --- a/cpp/src/routing/solution/solution.cu +++ b/cpp/src/routing/solution/solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -735,7 +735,7 @@ std::vector solution_t::get_unserviced_nodes() const std::vector unserviced_nodes; unserviced_nodes.reserve(get_num_orders()); const bool depot_included = problem_ptr->order_info.depot_included_; - auto h_route_id_per_node = host_copy(route_node_map.route_id_per_node); + auto h_route_id_per_node = host_copy(route_node_map.route_id_per_node, sol_handle->get_stream()); for (size_t i = 0; i < h_route_id_per_node.size(); ++i) { if (h_route_id_per_node[i] == -1) { if (i > 0 || !depot_included) { unserviced_nodes.push_back(i); } diff --git a/cpp/src/routing/utilities/cython.cu b/cpp/src/routing/utilities/cython.cu index 74b3776f4..d727ab5b7 100644 --- a/cpp/src/routing/utilities/cython.cu +++ b/cpp/src/routing/utilities/cython.cu @@ -1,16 +1,20 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #include #include +#include #include #include #include +#include +#include + namespace cuopt { namespace cython { @@ -86,6 +90,68 @@ std::unique_ptr call_solve( return std::make_unique(std::move(vr_ret)); } +/** + * @brief Wrapper for batch vehicle_routing to expose the API to cython + * + * @param data_models Vector of data model pointers + * @param settings Composable solver settings object + * @return std::vector> + */ +std::vector> call_batch_solve( + std::vector*> data_models, + routing::solver_settings_t* settings) +{ + const std::size_t size = data_models.size(); + std::vector> list(size); + + // Use OpenMP for parallel execution + const int max_thread = std::min(static_cast(size), omp_get_max_threads()); + rmm::cuda_stream_pool stream_pool(size, rmm::cuda_stream::flags::non_blocking); + +#pragma omp parallel for num_threads(max_thread) + for (std::size_t i = 0; i < size; ++i) { + auto old_stream = data_models[i]->get_handle_ptr()->get_stream(); + // Make sure previous operations are finished + data_models[i]->get_handle_ptr()->sync_stream(); + + // Set new non blocking stream for current data model + raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), stream_pool.get_stream(i)); + auto routing_solution = cuopt::routing::solve(*data_models[i], *settings); + + // Make sure current solve is finished + stream_pool.get_stream(i).synchronize(); + + // Create buffers and reassociate them with the original stream so they + // outlive the local stream which will be destroyed at end of loop iteration + auto make_buffer = [old_stream = old_stream](rmm::device_buffer&& buf) { + buf.set_stream(old_stream); + return std::make_unique(std::move(buf)); + }; + + vehicle_routing_ret_t vr_ret{routing_solution.get_vehicle_count(), + routing_solution.get_total_objective(), + routing_solution.get_objectives(), + make_buffer(routing_solution.get_route().release()), + make_buffer(routing_solution.get_order_locations().release()), + make_buffer(routing_solution.get_arrival_stamp().release()), + make_buffer(routing_solution.get_truck_id().release()), + make_buffer(routing_solution.get_node_types().release()), + make_buffer(routing_solution.get_unserviced_nodes().release()), + make_buffer(routing_solution.get_accepted().release()), + routing_solution.get_status(), + routing_solution.get_status_string(), + routing_solution.get_error_status().get_error_type(), + routing_solution.get_error_status().what()}; + list[i] = std::make_unique(std::move(vr_ret)); + + // Restore the old stream + raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), old_stream); + old_stream.synchronize(); + } + + return list; +} + /** * @brief Wrapper for dataset_t to expose the API to cython. * @param solver Composable solver object diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 24158816a..2b1890728 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -150,36 +150,6 @@ inline auto host_copy(bool const* device_ptr, size_t size, rmm::cuda_stream_view return h_bool_vec; } -/** - * @brief Simple utility function to copy device_uvector to host - * - * @tparam T - * @param device_vec - * @param stream_view - * @return auto - */ -template -auto host_copy(rmm::device_uvector const& device_vec) -{ - return host_copy(device_vec.data(), device_vec.size(), device_vec.stream()); -} - -/** - * @brief Simple utility function to copy device_uvector to host - * - * @tparam T - * @param device_vec - * @return auto - */ -template -auto host_copy(rmm::device_uvector const& device_vec) -{ - std::vector host_vec(device_vec.size()); - raft::copy(host_vec.data(), device_vec.data(), device_vec.size(), device_vec.stream()); - device_vec.stream().synchronize(); - return host_vec; -} - /** * @brief Simple utility function to copy device_uvector to host * @@ -369,8 +339,9 @@ template std::tuple, std::vector> extract_host_bounds( const rmm::device_uvector& variable_bounds, const raft::handle_t* handle_ptr) { - rmm::device_uvector var_lb(variable_bounds.size(), handle_ptr->get_stream()); - rmm::device_uvector var_ub(variable_bounds.size(), handle_ptr->get_stream()); + auto stream = handle_ptr->get_stream(); + rmm::device_uvector var_lb(variable_bounds.size(), stream); + rmm::device_uvector var_ub(variable_bounds.size(), stream); thrust::transform( handle_ptr->get_thrust_policy(), variable_bounds.begin(), @@ -378,8 +349,8 @@ std::tuple, std::vector> extract_host_bounds( thrust::make_zip_iterator(thrust::make_tuple(var_lb.begin(), var_ub.begin())), [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); handle_ptr->sync_stream(); - auto h_var_lb = cuopt::host_copy(var_lb); - auto h_var_ub = cuopt::host_copy(var_ub); + auto h_var_lb = cuopt::host_copy(var_lb, stream); + auto h_var_ub = cuopt::host_copy(var_ub, stream); return std::make_tuple(h_var_lb, h_var_ub); } diff --git a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu index 18d49e3b8..9f73c5035 100644 --- a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu +++ b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -127,16 +127,17 @@ TEST(SolverSettingsTest, warm_start_smaller_vector) -1); solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping); + auto stream = handle_.get_stream(); std::vector h_current_primal_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream); std::vector h_initial_primal_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream); std::vector h_current_ATY = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream); std::vector h_sum_primal_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream); std::vector h_last_restart_duality_gap_primal_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream); EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size()); EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size()); @@ -151,13 +152,13 @@ TEST(SolverSettingsTest, warm_start_smaller_vector) EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected); std::vector h_current_dual_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream); std::vector h_initial_dual_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream); std::vector h_sum_dual_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream); std::vector h_last_restart_duality_gap_dual_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream); EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size()); EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size()); @@ -227,16 +228,17 @@ TEST(SolverSettingsTest, warm_start_bigger_vector) -1); solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping); + auto stream = handle_.get_stream(); std::vector h_current_primal_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream); std::vector h_initial_primal_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream); std::vector h_current_ATY = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream); std::vector h_sum_primal_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream); std::vector h_last_restart_duality_gap_primal_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream); EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size()); EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size()); @@ -251,13 +253,13 @@ TEST(SolverSettingsTest, warm_start_bigger_vector) EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected); std::vector h_current_dual_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream); std::vector h_initial_dual_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream); std::vector h_sum_dual_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream); std::vector h_last_restart_duality_gap_dual_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream); EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size()); EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size()); diff --git a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh index de3e82fdb..11dbdba4b 100644 --- a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh +++ b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -38,7 +38,7 @@ static void test_objective_sanity( double objective_value, double epsilon = tolerance) { - const auto primal_vars = host_copy(primal_solution); + const auto primal_vars = host_copy(primal_solution, primal_solution.stream()); const auto& c_vector = op_problem.get_objective_coefficients(); std::vector out(primal_vars.size()); std::transform(primal_vars.cbegin(), @@ -62,7 +62,8 @@ static void test_constraint_sanity( double epsilon = tolerance, bool presolve_enabled = false) { - const std::vector primal_vars = host_copy(solution.get_primal_solution()); + const std::vector primal_vars = + host_copy(solution.get_primal_solution(), solution.get_primal_solution().stream()); const std::vector& values = op_problem.get_constraint_matrix_values(); const std::vector& indices = op_problem.get_constraint_matrix_indices(); const std::vector& offsets = op_problem.get_constraint_matrix_offsets(); diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu index 2b2f3f576..e1d66ac21 100644 --- a/cpp/tests/mip/elim_var_remap_test.cu +++ b/cpp/tests/mip/elim_var_remap_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -120,8 +120,8 @@ void test_elim_var_remap(std::string test_instance) sub_problem.post_process_solution(sol); - auto golden_full_assignment = host_copy(full_assignment); - auto fixed_sub_problem_assignment = host_copy(sol.assignment); + auto golden_full_assignment = host_copy(full_assignment, handle_.get_stream()); + auto fixed_sub_problem_assignment = host_copy(sol.assignment, handle_.get_stream()); EXPECT_EQ(op_problem.get_n_variables(), fixed_sub_problem_assignment.size()); diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 20f359fcb..019585d90 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -49,8 +49,9 @@ std::tuple, std::vector, std::vector> select_k_ auto seed = std::random_device{}(); std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); - auto v_bnd = host_copy(problem.variable_bounds); - auto int_var_id = host_copy(problem.integer_indices); + auto stream = problem.handle_ptr->get_stream(); + auto v_bnd = host_copy(problem.variable_bounds, stream); + auto int_var_id = host_copy(problem.integer_indices, stream); int_var_id.erase( std::remove_if( int_var_id.begin(), @@ -106,10 +107,11 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, bnd_prb_1.solve(problem, probe_second); bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1)); - auto h_lb_0 = host_copy(b_lb_0); - auto h_ub_0 = host_copy(b_ub_0); - auto h_lb_1 = host_copy(b_lb_1); - auto h_ub_1 = host_copy(b_ub_1); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb_0 = host_copy(b_lb_0, stream); + auto h_ub_0 = host_copy(b_ub_0, stream); + auto h_lb_1 = host_copy(b_lb_1, stream); + auto h_ub_1 = host_copy(b_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -151,12 +153,13 @@ void test_multi_probe(std::string path) rmm::device_uvector b_ub(problem.n_variables, problem.handle_ptr->get_stream()); bnd_prb.set_updated_bounds(problem.handle_ptr, make_span(b_lb), make_span(b_ub)); - auto h_lb = host_copy(b_lb); - auto h_ub = host_copy(b_ub); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb = host_copy(b_lb, stream); + auto h_ub = host_copy(b_ub, stream); lb_prs.solve(probe_first); - auto bnds = host_copy(lb_prs.vars_bnd); + auto bnds = host_copy(lb_prs.vars_bnd, stream); for (int i = 0; i < (int)h_lb.size(); ++i) { EXPECT_DOUBLE_EQ(bnds[2 * i], h_lb[i]); EXPECT_DOUBLE_EQ(bnds[2 * i + 1], h_ub[i]); diff --git a/cpp/tests/mip/mip_utils.cuh b/cpp/tests/mip/mip_utils.cuh index 7d5683998..19c44b2fd 100644 --- a/cpp/tests/mip/mip_utils.cuh +++ b/cpp/tests/mip/mip_utils.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -21,7 +21,7 @@ static void test_variable_bounds( { const double* lower_bound_ptr = problem.get_variable_lower_bounds().data(); const double* upper_bound_ptr = problem.get_variable_upper_bounds().data(); - auto host_assignment = cuopt::host_copy(solution); + auto host_assignment = cuopt::host_copy(solution, solution.stream()); double* assignment_ptr = host_assignment.data(); cuopt_assert(host_assignment.size() == problem.get_variable_lower_bounds().size(), ""); cuopt_assert(host_assignment.size() == problem.get_variable_upper_bounds().size(), ""); @@ -81,7 +81,7 @@ static void test_constraint_sanity_per_row( const std::vector& variable_upper_bounds = op_problem.get_variable_upper_bounds(); std::vector residual(constraint_lower_bounds.size(), 0.0); std::vector viol(constraint_lower_bounds.size(), 0.0); - auto h_solution = cuopt::host_copy(solution); + auto h_solution = cuopt::host_copy(solution, solution.stream()); // CSR SpMV for (size_t i = 0; i < offsets.size() - 1; ++i) { for (int j = offsets[i]; j < offsets[i + 1]; ++j) { diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index cb960425f..9a933c054 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -49,7 +49,7 @@ std::tuple, std::vector, std::vector> select_k_ std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); auto [v_lb, v_ub] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); - auto int_var_id = host_copy(problem.integer_indices); + auto int_var_id = host_copy(problem.integer_indices, problem.handle_ptr->get_stream()); int_var_id.erase( std::remove_if(int_var_id.begin(), int_var_id.end(), @@ -106,10 +106,11 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, bnd_prb_1.solve(problem, probe_second); bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1)); - auto h_lb_0 = host_copy(b_lb_0); - auto h_ub_0 = host_copy(b_ub_0); - auto h_lb_1 = host_copy(b_lb_1); - auto h_ub_1 = host_copy(b_ub_1); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb_0 = host_copy(b_lb_0, stream); + auto h_ub_0 = host_copy(b_ub_0, stream); + auto h_lb_1 = host_copy(b_lb_1, stream); + auto h_ub_1 = host_copy(b_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -121,17 +122,18 @@ multi_probe_results( const std::tuple, std::vector, std::vector>& probe_tuple) { prb.solve(problem, probe_tuple); - rmm::device_uvector m_lb_0(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_ub_0(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_lb_1(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_ub_1(problem.n_variables, problem.handle_ptr->get_stream()); + auto stream = problem.handle_ptr->get_stream(); + rmm::device_uvector m_lb_0(problem.n_variables, stream); + rmm::device_uvector m_ub_0(problem.n_variables, stream); + rmm::device_uvector m_lb_1(problem.n_variables, stream); + rmm::device_uvector m_ub_1(problem.n_variables, stream); prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_0), make_span(m_ub_0), 0); prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_1), make_span(m_ub_1), 1); - auto h_lb_0 = host_copy(m_lb_0); - auto h_ub_0 = host_copy(m_ub_0); - auto h_lb_1 = host_copy(m_lb_1); - auto h_ub_1 = host_copy(m_ub_1); + auto h_lb_0 = host_copy(m_lb_0, stream); + auto h_ub_0 = host_copy(m_ub_0, stream); + auto h_lb_1 = host_copy(m_lb_1, stream); + auto h_ub_1 = host_copy(m_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -170,15 +172,16 @@ void test_multi_probe(std::string path) auto [m_lb_0, m_ub_0, m_lb_1, m_ub_1] = multi_probe_results(multi_probe_prs, problem, probe_tuple); - auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity); - auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity); - auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity); - auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity); + auto stream = problem.handle_ptr->get_stream(); + auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity, stream); + auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity, stream); + auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity, stream); + auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity, stream); - auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity); - auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity); - auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity); - auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity); + auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity, stream); + auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity, stream); + auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity, stream); + auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity, stream); for (int i = 0; i < (int)bnd_min_act_0.size(); ++i) { EXPECT_DOUBLE_EQ(bnd_min_act_0[i], mlp_min_act_0[i]); diff --git a/cpp/tests/mip/presolve_test.cu b/cpp/tests/mip/presolve_test.cu index d27dd1db9..893602e20 100644 --- a/cpp/tests/mip/presolve_test.cu +++ b/cpp/tests/mip/presolve_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -44,7 +44,7 @@ TEST(problem, find_implied_integers) auto problem = detail::problem_t(result->reduced_problem); problem.set_implied_integers(result->implied_integer_indices); ASSERT_TRUE(result->implied_integer_indices.size() > 0); - auto var_types = host_copy(problem.variable_types); + auto var_types = host_copy(problem.variable_types, handle_.get_stream()); // Find the index of the one continuous variable auto it = std::find_if(var_types.begin(), var_types.end(), [](var_t var_type) { return var_type == var_t::CONTINUOUS; diff --git a/cpp/tests/mip/problem_test.cu b/cpp/tests/mip/problem_test.cu index 7113e265b..f884fda10 100644 --- a/cpp/tests/mip/problem_test.cu +++ b/cpp/tests/mip/problem_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -234,8 +234,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) fill_problem(op_problem); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 1.0); EXPECT_EQ(constraints_upper_bounds[0], 1.0); @@ -252,8 +254,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) op_problem.set_constraint_upper_bounds(upper, 1); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 2.0); EXPECT_EQ(constraints_upper_bounds[0], 3.0); } @@ -270,8 +274,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) fill_problem(op_problem); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 2.0); EXPECT_EQ(constraints_upper_bounds[0], 3.0); } diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index eb7e4bb3b..f9d76611d 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -180,7 +180,7 @@ TEST(LPTest, TestSampleLP2) ASSERT_EQ(result.get_primal_solution().size(), 1); // Copy solution to host to access values - auto primal_host = cuopt::host_copy(result.get_primal_solution()); + auto primal_host = cuopt::host_copy(result.get_primal_solution(), handle.get_stream()); EXPECT_NEAR(primal_host[0], 0.0, 1e-6); EXPECT_NEAR(result.get_additional_termination_information().primal_objective, 0.0, 1e-6); diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu index b18150899..e27c16624 100644 --- a/cpp/tests/qp/unit_tests/two_variable_test.cu +++ b/cpp/tests/qp/unit_tests/two_variable_test.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights * reserved. SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -65,7 +65,7 @@ TEST(two_variable_test, simple_test) cuopt::linear_programming::pdlp_termination_status_t::Optimal); EXPECT_NEAR(solution.get_objective_value(), -32.0, 1e-6); - auto sol_vec = cuopt::host_copy(solution.get_primal_solution()); + auto sol_vec = cuopt::host_copy(solution.get_primal_solution(), handle.get_stream()); EXPECT_NEAR(sol_vec[0], 4.0, 1e-6); EXPECT_NEAR(sol_vec[1], 2.0, 1e-6); } diff --git a/cpp/tests/routing/CMakeLists.txt b/cpp/tests/routing/CMakeLists.txt index 0ee757c3e..99cfdb9de 100644 --- a/cpp/tests/routing/CMakeLists.txt +++ b/cpp/tests/routing/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -32,4 +32,5 @@ ConfigureTest(ROUTING_UNIT_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/prize_collection.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/objective_function.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/top_k.cu + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/batch_tsp.cu ) diff --git a/cpp/tests/routing/routing_test.cuh b/cpp/tests/routing/routing_test.cuh index 31ad78332..cdafbbf1f 100644 --- a/cpp/tests/routing/routing_test.cuh +++ b/cpp/tests/routing/routing_test.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -482,7 +482,7 @@ class base_test_t { vehicle_max_times_d.data(), vehicle_max_times_d.size(), stream_view_); - fleet_order_constraints_h = fleet_order_constraints_d.to_host(); + fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_); } void check_time_windows(host_assignment_t const& routing_solution, bool is_soft_tw = false) @@ -492,7 +492,7 @@ class base_test_t { auto truck_id = routing_solution.truck_id; auto locations = routing_solution.locations; auto node_types = routing_solution.node_types; - fleet_order_constraints_h = fleet_order_constraints_d.to_host(); + fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_); std::vector temp_truck_ids(truck_id); auto end_it = std::unique(temp_truck_ids.begin(), temp_truck_ids.end()); diff --git a/cpp/tests/routing/unit_tests/batch_tsp.cu b/cpp/tests/routing/unit_tests/batch_tsp.cu new file mode 100644 index 000000000..01ae36e70 --- /dev/null +++ b/cpp/tests/routing/unit_tests/batch_tsp.cu @@ -0,0 +1,89 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include + +#include + +#include + +#include + +namespace cuopt { +namespace routing { +namespace test { + +using i_t = int; +using f_t = float; + +/** + * @brief Creates a small symmetric cost matrix for TSP + * @param n_locations Number of locations + * @return Cost matrix as a flattened vector + */ +std::vector create_small_tsp_cost_matrix(i_t n_locations) +{ + std::vector cost_matrix(n_locations * n_locations, 0.0f); + + // Create a simple distance matrix based on coordinates on a line + for (i_t i = 0; i < n_locations; ++i) { + for (i_t j = 0; j < n_locations; ++j) { + cost_matrix[i * n_locations + j] = static_cast(std::abs(i - j)); + } + } + return cost_matrix; +} + +/** + * @brief Test running TSPs of varying sizes in parallel using call_batch_solve API + */ +TEST(batch_tsp, varying_sizes) +{ + std::vector tsp_sizes = {5, 8, 10, 6, 7, 9}; + const i_t n_problems = static_cast(tsp_sizes.size()); + + // Create handles and cost matrices for each problem + std::vector> handles; + std::vector> cost_matrices_d; + std::vector>> data_models; + std::vector*> data_model_ptrs; + + for (i_t i = 0; i < n_problems; ++i) { + handles.push_back(std::make_unique()); + auto& handle = *handles.back(); + + auto cost_matrix_h = create_small_tsp_cost_matrix(tsp_sizes[i]); + cost_matrices_d.push_back(cuopt::device_copy(cost_matrix_h, handle.get_stream())); + + data_models.push_back(std::make_unique>( + &handle, tsp_sizes[i], 1, tsp_sizes[i])); + data_models.back()->add_cost_matrix(cost_matrices_d.back().data()); + data_model_ptrs.push_back(data_models.back().get()); + } + + // Configure solver settings + cuopt::routing::solver_settings_t settings; + settings.set_time_limit(5); + + // Call batch solve + auto solutions = cuopt::cython::call_batch_solve(data_model_ptrs, &settings); + + // Verify all solutions + ASSERT_EQ(solutions.size(), n_problems); + for (i_t i = 0; i < n_problems; ++i) { + EXPECT_EQ(solutions[i]->status_, cuopt::routing::solution_status_t::SUCCESS) + << "TSP " << i << " (size " << tsp_sizes[i] << ") failed"; + EXPECT_EQ(solutions[i]->vehicle_count_, 1) + << "TSP " << i << " (size " << tsp_sizes[i] << ") used multiple vehicles"; + } +} + +} // namespace test +} // namespace routing +} // namespace cuopt diff --git a/cpp/tests/routing/unit_tests/vehicle_order_match.cu b/cpp/tests/routing/unit_tests/vehicle_order_match.cu index ba7dbbf0d..22691b3b8 100644 --- a/cpp/tests/routing/unit_tests/vehicle_order_match.cu +++ b/cpp/tests/routing/unit_tests/vehicle_order_match.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -48,8 +48,9 @@ TEST(vehicle_order_match, two_vehicle_four_orders) EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS); - auto route_id = cuopt::host_copy(routing_solution.get_route()); - auto truck_id = cuopt::host_copy(routing_solution.get_truck_id()); + auto stream = handle.get_stream(); + auto route_id = cuopt::host_copy(routing_solution.get_route(), stream); + auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); for (size_t i = 0; i < route_id.size(); ++i) { if (route_id[i] == 3 || route_id[i] == 1) { EXPECT_EQ(truck_id[i], 0); } } @@ -71,12 +72,13 @@ TEST(vehicle_order_match, one_order_per_vehicle) raft::handle_t handle; cuopt::routing::data_model_view_t data_model(&handle, n_locations, n_vehicles); - auto time_mat_d = cuopt::device_copy(time_mat, handle.get_stream()); + auto stream = handle.get_stream(); + auto time_mat_d = cuopt::device_copy(time_mat, stream); data_model.add_cost_matrix(time_mat_d.data()); std::unordered_map> vehicle_order_match_d; for (const auto& [id, orders] : vehicle_order_match) { - vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, handle.get_stream())); + vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, stream)); } for (const auto& [id, orders] : vehicle_order_match_d) { @@ -87,8 +89,8 @@ TEST(vehicle_order_match, one_order_per_vehicle) EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS); - auto route_id = cuopt::host_copy(routing_solution.get_route()); - auto truck_id = cuopt::host_copy(routing_solution.get_truck_id()); + auto route_id = cuopt::host_copy(routing_solution.get_route(), stream); + auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); for (size_t i = 0; i < route_id.size(); ++i) { auto order = route_id[i]; auto vehicle = truck_id[i]; diff --git a/python/cuopt/cuopt/routing/__init__.py b/python/cuopt/cuopt/routing/__init__.py index efa61b477..081d58f99 100644 --- a/python/cuopt/cuopt/routing/__init__.py +++ b/python/cuopt/cuopt/routing/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from cuopt.routing.assignment import Assignment, SolutionStatus @@ -9,5 +9,5 @@ update_routes_and_vehicles, ) from cuopt.routing.utils_wrapper import DatasetDistribution -from cuopt.routing.vehicle_routing import DataModel, Solve, SolverSettings +from cuopt.routing.vehicle_routing import BatchSolve, DataModel, Solve, SolverSettings from cuopt.routing.vehicle_routing_wrapper import ErrorStatus, Objective diff --git a/python/cuopt/cuopt/routing/vehicle_routing.pxd b/python/cuopt/cuopt/routing/vehicle_routing.pxd index 4638f8ae7..7f89d33ff 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing.pxd +++ b/python/cuopt/cuopt/routing/vehicle_routing.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,7 @@ from libcpp cimport bool from libcpp.string cimport string +from libcpp.vector cimport vector from pylibraft.common.handle cimport * @@ -133,3 +134,8 @@ cdef extern from "cuopt/routing/cython/cython.hpp" namespace "cuopt::cython": # data_model_view_t[int, float]* data_model, solver_settings_t[int, float]* solver_settings ) except + + + cdef vector[unique_ptr[vehicle_routing_ret_t]] call_batch_solve( + vector[data_model_view_t[int, float] *] data_models, + solver_settings_t[int, float]* solver_settings + ) except + diff --git a/python/cuopt/cuopt/routing/vehicle_routing.py b/python/cuopt/cuopt/routing/vehicle_routing.py index 365709147..990283667 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing.py +++ b/python/cuopt/cuopt/routing/vehicle_routing.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import numpy as np @@ -1540,3 +1540,52 @@ def Solve(data_model, solver_settings=None): solver_settings.get_config_file_name(), ) return solution + + +@catch_cuopt_exception +def BatchSolve(data_model_list, solver_settings=None): + """ + Solves multiple routing problems in batch mode using parallel execution. + + Parameters + ---------- + data_model_list: list of DataModel + List of data model objects representing routing problems to solve. + solver_settings: SolverSettings + Settings to configure solver configurations. + By default, it uses default solver settings to solve. + + Returns + ------- + tuple + A tuple containing: + - list of Assignment: Solutions for each routing problem + + Examples + -------- + >>> from cuopt import routing + >>> import cudf + >>> # Create multiple data models + >>> data_models = [] + >>> for i in range(5): + ... cost_matrix = cudf.DataFrame([[0, 1, 2], [1, 0, 3], [2, 3, 0]]) + ... dm = routing.DataModel(3, 1) + ... dm.add_cost_matrix(cost_matrix) + ... data_models.append(dm) + >>> settings = routing.SolverSettings() + >>> settings.set_time_limit(1.0) + >>> solutions, solve_time = routing.BatchSolve(data_models, settings) + """ + + if not isinstance(data_model_list, list): + raise ValueError("data_model_list must be a list of DataModel objects") + if len(data_model_list) == 0: + raise ValueError("data_model_list cannot be empty") + if not all(isinstance(dm, DataModel) for dm in data_model_list): + raise ValueError( + "All elements in data_model_list must be DataModel instances" + ) + if solver_settings is None: + solver_settings = SolverSettings() + + return vehicle_routing_wrapper.BatchSolve(data_model_list, solver_settings) diff --git a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx index bf4a2570c..c1d4bd01a 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx +++ b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -11,6 +11,7 @@ from pylibraft.common.handle cimport * from cuopt.routing.structure.routing_utilities cimport * from cuopt.routing.vehicle_routing cimport ( + call_batch_solve, call_solve, data_model_view_t, node_type_t, @@ -32,8 +33,10 @@ from libc.stdlib cimport free, malloc from libc.string cimport memcpy, strcpy, strlen from libcpp cimport bool from libcpp.memory cimport unique_ptr +from libcpp.pair cimport pair from libcpp.string cimport string from libcpp.utility cimport move +from libcpp.vector cimport vector from rmm.pylibrmm.device_buffer cimport DeviceBuffer @@ -834,3 +837,126 @@ def Solve(DataModel data_model, SolverSettings solver_settings): error_message, unserviced_nodes ) + + +cdef create_assignment_from_vr_ret(vehicle_routing_ret_t& vr_ret): + """Helper function to create an Assignment from a vehicle_routing_ret_t""" + vehicle_count = vr_ret.vehicle_count_ + total_objective_value = vr_ret.total_objective_value_ + + objective_values = {} + for k in vr_ret.objective_values_: + obj = Objective(int(k.first)) + objective_values[obj] = k.second + + status = vr_ret.status_ + cdef char* c_sol_string = c_get_string(vr_ret.solution_string_) + try: + solver_status_string = \ + c_sol_string[:vr_ret.solution_string_.length()].decode('UTF-8') + finally: + free(c_sol_string) + + route = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_route_)) + route_locations = DeviceBuffer.c_from_unique_ptr( + move(vr_ret.d_route_locations_) + ) + arrival_stamp = DeviceBuffer.c_from_unique_ptr( + move(vr_ret.d_arrival_stamp_) + ) + truck_id = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_truck_id_)) + node_types = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_node_types_)) + unserviced_nodes_buf = \ + DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_unserviced_nodes_)) + accepted_buf = \ + DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_accepted_)) + + route_df = cudf.DataFrame() + route_df['route'] = series_from_buf(route, pa.int32()) + route_df['arrival_stamp'] = series_from_buf(arrival_stamp, pa.float64()) + route_df['truck_id'] = series_from_buf(truck_id, pa.int32()) + route_df['location'] = series_from_buf(route_locations, pa.int32()) + route_df['type'] = series_from_buf(node_types, pa.int32()) + + unserviced_nodes = cudf.Series._from_column( + series_from_buf(unserviced_nodes_buf, pa.int32()) + ) + accepted = cudf.Series._from_column( + series_from_buf(accepted_buf, pa.int32()) + ) + + def get_type_from_int(type_in_int): + if type_in_int == int(NodeType.DEPOT): + return "Depot" + elif type_in_int == int(NodeType.PICKUP): + return "Pickup" + elif type_in_int == int(NodeType.DELIVERY): + return "Delivery" + elif type_in_int == int(NodeType.BREAK): + return "Break" + + node_types_string = [ + get_type_from_int(type_in_int) + for type_in_int in route_df['type'].to_pandas()] + route_df['type'] = node_types_string + error_status = vr_ret.error_status_ + error_message = vr_ret.error_message_ + + return Assignment( + vehicle_count, + total_objective_value, + objective_values, + route_df, + accepted, + status, + solver_status_string, + error_status, + error_message, + unserviced_nodes + ) + + +def BatchSolve(py_data_model_list, SolverSettings solver_settings): + """ + Solve multiple routing problems in batch mode using parallel execution. + + Parameters + ---------- + py_data_model_list : list of DataModel + List of data model objects representing routing problems to solve. + solver_settings : SolverSettings + Solver settings to use for all problems. + + Returns + ------- + tuple + A tuple containing: + - list of Assignment: Solutions for each routing problem + - float: Total solve time in seconds + """ + cdef solver_settings_t[int, float]* c_solver_settings = ( + solver_settings.c_solver_settings.get() + ) + + cdef vector[data_model_view_t[int, float] *] data_model_views + + for data_model_obj in py_data_model_list: + data_model_views.push_back( + (data_model_obj).c_data_model_view.get() + ) + + cdef vector[unique_ptr[vehicle_routing_ret_t]] batch_solve_result = ( + move(call_batch_solve(data_model_views, c_solver_settings)) + ) + + cdef vector[unique_ptr[vehicle_routing_ret_t]] c_solutions = ( + move(batch_solve_result) + ) + + solutions = [] + for i in range(c_solutions.size()): + solutions.append( + create_assignment_from_vr_ret(c_solutions[i].get()[0]) + ) + + return solutions diff --git a/python/cuopt/cuopt/tests/routing/test_batch_solve.py b/python/cuopt/cuopt/tests/routing/test_batch_solve.py new file mode 100644 index 000000000..31d09c202 --- /dev/null +++ b/python/cuopt/cuopt/tests/routing/test_batch_solve.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import cudf +import numpy as np + +from cuopt import routing + + +def create_tsp_cost_matrix(n_locations): + """Creates a simple symmetric cost matrix for TSP.""" + cost_matrix = np.zeros((n_locations, n_locations), dtype=np.float32) + for i in range(n_locations): + for j in range(n_locations): + cost_matrix[i, j] = abs(i - j) + return cudf.DataFrame(cost_matrix) + + +def test_batch_solve_varying_sizes(): + """Test batch solving TSPs of varying sizes.""" + tsp_sizes = [ + 5, + 8, + 10, + 6, + 7, + 9, + 12, + 15, + 11, + 4, + 13, + 14, + 8, + 6, + 10, + 9, + 7, + 11, + 5, + 12, + ] + + # Create data models for each TSP + data_models = [] + for n_locations in tsp_sizes: + cost_matrix = create_tsp_cost_matrix(n_locations) + dm = routing.DataModel(n_locations, 1) + dm.add_cost_matrix(cost_matrix) + data_models.append(dm) + + # Configure solver settings + settings = routing.SolverSettings() + settings.set_time_limit(5.0) + + # Call batch solve + solutions = routing.BatchSolve(data_models, settings) + + # Verify results + assert len(solutions) == len(tsp_sizes) + for i, solution in enumerate(solutions): + assert solution.get_status() == 0, ( + f"TSP {i} (size {tsp_sizes[i]}) failed" + ) + assert solution.get_vehicle_count() == 1, ( + f"TSP {i} (size {tsp_sizes[i]}) used multiple vehicles" + )