Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/include/cuopt/routing/cython/cython.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand All @@ -16,6 +16,7 @@
#include <raft/core/handle.hpp>

#include <memory>
#include <vector>

namespace cuopt {
namespace cython {
Expand Down Expand Up @@ -82,6 +83,10 @@ struct dataset_ret_t {
std::unique_ptr<vehicle_routing_ret_t> call_solve(routing::data_model_view_t<int, float>*,
routing::solver_settings_t<int, float>*);

// Wrapper for batch solve to expose the API to cython.
std::vector<std::unique_ptr<vehicle_routing_ret_t>> call_batch_solve(
std::vector<routing::data_model_view_t<int, float>*>, routing::solver_settings_t<int, float>*);

// Wrapper for dataset to expose the API to cython.
std::unique_ptr<dataset_ret_t> call_generate_dataset(
raft::handle_t const& handle, routing::generator::dataset_params_t<int, float> const& params);
Expand Down
45 changes: 25 additions & 20 deletions cpp/src/linear_programming/optimization_problem.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -337,7 +337,7 @@ i_t optimization_problem_t<i_t, f_t>::get_n_integers() const
{
i_t n_integers = 0;
if (get_n_variables() != 0) {
auto enum_variable_types = cuopt::host_copy(get_variable_types());
auto enum_variable_types = cuopt::host_copy(get_variable_types(), handle_ptr_->get_stream());

for (size_t i = 0; i < enum_variable_types.size(); ++i) {
if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; }
Expand Down Expand Up @@ -591,16 +591,17 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
data_model_view.set_maximize(get_sense());

// Copy to host
auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values());
auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices());
auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets());
auto constraint_bounds = cuopt::host_copy(get_constraint_bounds());
auto objective_coefficients = cuopt::host_copy(get_objective_coefficients());
auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds());
auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds());
auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds());
auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds());
auto row_types = cuopt::host_copy(get_row_types());
auto stream = handle_ptr_->get_stream();
auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values(), stream);
auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices(), stream);
auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets(), stream);
auto constraint_bounds = cuopt::host_copy(get_constraint_bounds(), stream);
auto objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream);
auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream);
auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream);
auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds(), stream);
auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds(), stream);
auto row_types = cuopt::host_copy(get_row_types(), stream);

// Set constraint matrix in CSR format
if (get_nnz() != 0) {
Expand Down Expand Up @@ -652,7 +653,7 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
std::vector<char> variable_types(get_n_variables());
// Set variable types (convert from enum to char)
if (get_n_variables() != 0) {
auto enum_variable_types = cuopt::host_copy(get_variable_types());
auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream);

// Convert enum types to char types
for (size_t i = 0; i < variable_types.size(); ++i) {
Expand All @@ -677,13 +678,17 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
template <typename i_t, typename f_t>
void optimization_problem_t<i_t, f_t>::print_scaling_information() const
{
std::vector<f_t> constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values());
std::vector<f_t> constraint_rhs = cuopt::host_copy(get_constraint_bounds());
std::vector<f_t> objective_coefficients = cuopt::host_copy(get_objective_coefficients());
std::vector<f_t> variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds());
std::vector<f_t> variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds());
std::vector<f_t> constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds());
std::vector<f_t> constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds());
auto stream = handle_ptr_->get_stream();
std::vector<f_t> constraint_matrix_values =
cuopt::host_copy(get_constraint_matrix_values(), stream);
std::vector<f_t> constraint_rhs = cuopt::host_copy(get_constraint_bounds(), stream);
std::vector<f_t> objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream);
std::vector<f_t> variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream);
std::vector<f_t> variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream);
std::vector<f_t> constraint_lower_bounds =
cuopt::host_copy(get_constraint_lower_bounds(), stream);
std::vector<f_t> constraint_upper_bounds =
cuopt::host_copy(get_constraint_upper_bounds(), stream);

auto findMaxAbs = [](const std::vector<f_t>& vec) -> f_t {
if (vec.empty()) { return 0.0; }
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/linear_programming/translate.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -116,22 +116,23 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
{
CUOPT_LOG_DEBUG("Starting translation");

std::vector<f_t> pdlp_objective = cuopt::host_copy(problem.objective_coefficients);
auto stream = problem.handle_ptr->get_stream();
std::vector<f_t> pdlp_objective = cuopt::host_copy(problem.objective_coefficients, stream);

dual_simplex::csr_matrix_t<i_t, f_t> csr_A(
problem.n_constraints, problem.n_variables, problem.nnz);
csr_A.x = cuopt::host_copy(problem.coefficients);
csr_A.j = cuopt::host_copy(problem.variables);
csr_A.row_start = cuopt::host_copy(problem.offsets);
csr_A.x = cuopt::host_copy(problem.coefficients, stream);
csr_A.j = cuopt::host_copy(problem.variables, stream);
csr_A.row_start = cuopt::host_copy(problem.offsets, stream);

problem.handle_ptr->get_stream().synchronize();
stream.synchronize();
CUOPT_LOG_DEBUG("Converting to compressed column");
csr_A.to_compressed_col(lp.A);
CUOPT_LOG_DEBUG("Converted to compressed column");

std::vector<f_t> slack(problem.n_constraints);
std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution());
problem.handle_ptr->get_stream().synchronize();
std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream);
stream.synchronize();
dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack);
CUOPT_LOG_DEBUG("Multiplied A and x");

Expand Down Expand Up @@ -161,8 +162,8 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,

auto [lower, upper] = extract_host_bounds<f_t>(problem.variable_bounds, problem.handle_ptr);

std::vector<f_t> constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds);
std::vector<f_t> constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds);
std::vector<f_t> constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds, stream);
std::vector<f_t> constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds, stream);

lp.objective.resize(n, 0.0);
std::copy(
Expand All @@ -187,10 +188,10 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
if (initial_solution.x[j] > lp.upper[j]) { initial_solution.x[j] = lp.upper[j]; }
}
CUOPT_LOG_DEBUG("Finished with x");
initial_solution.y = cuopt::host_copy(sol.get_dual_solution());
initial_solution.y = cuopt::host_copy(sol.get_dual_solution(), stream);

std::vector<f_t> tmp_z = cuopt::host_copy(sol.get_reduced_cost());
problem.handle_ptr->get_stream().synchronize();
std::vector<f_t> tmp_z = cuopt::host_copy(sol.get_reduced_cost(), stream);
stream.synchronize();
std::copy(tmp_z.begin(), tmp_z.begin() + problem.n_variables, initial_solution.z.begin());
for (i_t j = problem.n_variables; j < n; ++j) {
initial_solution.z[j] = initial_solution.y[j - problem.n_variables];
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/mip/diversity/lns/rins.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
* reserved. SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -221,7 +221,7 @@ void rins_t<i_t, f_t>::run_rins()
&rins_handle, &fixed_problem, context.settings, context.scaling);
fj_t<i_t, f_t> fj(fj_context);
solution_t<i_t, f_t> fj_solution(fixed_problem);
fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment));
fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream()));
std::vector<f_t> default_weights(fixed_problem.n_constraints, 1.);
cpu_fj_thread_t<i_t, f_t> cpu_fj_thread;
cpu_fj_thread.fj_cpu = fj.create_cpu_climber(fj_solution,
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -190,7 +190,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
probing_values,
n_vars_from_other,
variable_map);
probing_config.probing_values = host_copy(probing_values);
probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream());
probing_config.n_of_fixed_from_first = fixed_from_guiding;
probing_config.n_of_fixed_from_second = fixed_from_other;
probing_config.use_balanced_probing = true;
Expand All @@ -214,7 +214,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit);
get_probing_values_for_infeasible(
guiding_solution, other_solution, offspring, probing_values, n_vars_from_other);
probing_config.probing_values = host_copy(probing_values);
probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream());
constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config);
}
constraint_prop.max_n_failed_repair_iterations = 1;
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/mip/local_search/local_search.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -110,7 +110,8 @@ void local_search_t<i_t, f_t>::start_cpufj_lptopt_scratch_threads(
std::vector<f_t> default_weights(context.problem_ptr->n_constraints, 1.);

solution_t<i_t, f_t> solution_lp(*context.problem_ptr);
solution_lp.copy_new_assignment(host_copy(lp_optimal_solution));
solution_lp.copy_new_assignment(
host_copy(lp_optimal_solution, context.problem_ptr->handle_ptr->get_stream()));
solution_lp.round_random_nearest(500);
scratch_cpu_fj_on_lp_opt.fj_cpu = fj.create_cpu_climber(
solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_);
Expand Down
24 changes: 13 additions & 11 deletions cpp/src/mip/presolve/conditional_bound_strengthening.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -230,11 +230,12 @@ void conditional_bound_strengthening_t<i_t, f_t>::select_constraint_pairs_host(
#ifdef DEBUG_COND_BOUNDS_PROP
auto start_time = std::chrono::high_resolution_clock::now();
#endif
auto variables = cuopt::host_copy(problem.variables);
auto offsets = cuopt::host_copy(problem.offsets);
auto stream = problem.handle_ptr->get_stream();
auto variables = cuopt::host_copy(problem.variables, stream);
auto offsets = cuopt::host_copy(problem.offsets, stream);

auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints);
auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets);
auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints, stream);
auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets, stream);

std::vector<int2> constraint_pairs_h(max_pair_per_row * problem.n_constraints, {-1, -1});
std::unordered_set<int> cnstr_pair;
Expand Down Expand Up @@ -295,8 +296,8 @@ void conditional_bound_strengthening_t<i_t, f_t>::select_constraint_pairs_device
colsC,
valsC);
std::vector<int2> constraint_pairs_h;
offsets_h = cuopt::host_copy(offsetsC);
cols_h = cuopt::host_copy(colsC);
offsets_h = cuopt::host_copy(offsetsC, stream);
cols_h = cuopt::host_copy(colsC, stream);

constraint_pairs_h.reserve(max_pair_per_row * problem.n_constraints);
for (int i = 0; i < problem.n_constraints; ++i) {
Expand Down Expand Up @@ -654,8 +655,9 @@ void conditional_bound_strengthening_t<i_t, f_t>::solve(problem_t<i_t, f_t>& pro
raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size;

#ifdef DEBUG_COND_BOUNDS_PROP
auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds);
auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds);
auto debug_stream = problem.handle_ptr->get_stream();
auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream);
auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream);

auto start_time = std::chrono::high_resolution_clock::now();
#endif
Expand All @@ -674,8 +676,8 @@ void conditional_bound_strengthening_t<i_t, f_t>::solve(problem_t<i_t, f_t>& pro
double time_for_presolve =
std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();

auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds);
auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds);
auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream);
auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream);

int num_improvements = 0;
int num_new_equality = 0;
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/mip/presolve/lb_probing_cache.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -302,7 +302,7 @@ inline std::vector<i_t> compute_prioritized_integer_indices(
}
return false;
});
auto h_priority_indices = host_copy(priority_indices);
auto h_priority_indices = host_copy(priority_indices, problem.pb->handle_ptr->get_stream());
return h_priority_indices;
}

Expand All @@ -315,9 +315,10 @@ void compute_probing_cache(load_balanced_bounds_presolve_t<i_t, f_t>& bound_pres
auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem);
// std::cout<<"priority_indices\n";
CUOPT_LOG_DEBUG("Computing probing cache");
auto h_integer_indices = host_copy(problem.pb->integer_indices);
auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds);
auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds);
auto stream = problem.pb->handle_ptr->get_stream();
auto h_integer_indices = host_copy(problem.pb->integer_indices, stream);
auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds, stream);
auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds, stream);
size_t n_of_cached_probings = 0;
// TODO adjust the iteration limit depending on the total time limit and time it takes for single
// var
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/mip/presolve/load_balanced_partition_helpers.cuh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -181,7 +181,7 @@ class log_dist_t {
log_dist_t() = default;

log_dist_t(rmm::device_uvector<i_t>& vertex_id, rmm::device_uvector<i_t>& bin_offsets)
: vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets))
: vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets, bin_offsets.stream()))
{
// If bin_offsets_ is smaller than NumberBins<i_t> then resize it
// so that the last element is repeated
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/mip/presolve/probing_cache.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* clang-format off */
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/* clang-format on */
Expand Down Expand Up @@ -346,7 +346,7 @@ inline std::vector<i_t> compute_prioritized_integer_indices(
}
return false;
});
auto h_priority_indices = host_copy(priority_indices);
auto h_priority_indices = host_copy(priority_indices, problem.handle_ptr->get_stream());
problem.handle_ptr->sync_stream();
return h_priority_indices;
}
Expand Down Expand Up @@ -461,8 +461,9 @@ void compute_probing_cache(bound_presolve_t<i_t, f_t>& bound_presolve,
// we dont want to compute the probing cache for all variables for time and computation resources
auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem);
CUOPT_LOG_DEBUG("Computing probing cache");
auto h_integer_indices = host_copy(problem.integer_indices);
const auto h_var_bounds = host_copy(problem.variable_bounds);
auto stream = problem.handle_ptr->get_stream();
auto h_integer_indices = host_copy(problem.integer_indices, stream);
const auto h_var_bounds = host_copy(problem.variable_bounds, stream);
// TODO adjust the iteration limit depending on the total time limit and time it takes for single
// var
bound_presolve.settings.iteration_limit = 50;
Expand Down
Loading
Loading