diff --git a/cpp/include/cuopt/routing/cython/cython.hpp b/cpp/include/cuopt/routing/cython/cython.hpp
index 75898ece7..93fdedc78 100644
--- a/cpp/include/cuopt/routing/cython/cython.hpp
+++ b/cpp/include/cuopt/routing/cython/cython.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -16,6 +16,7 @@
 #include <raft/core/handle.hpp>
 
 #include <memory>
+#include <vector>
 
 namespace cuopt {
 namespace cython {
@@ -82,6 +83,10 @@ struct dataset_ret_t {
 std::unique_ptr<vehicle_routing_ret_t> call_solve(routing::data_model_view_t<int, float>*,
                                                   routing::solver_settings_t<int, float>*);
 
+// Wrapper for batch solve to expose the API to cython.
+std::vector<std::unique_ptr<vehicle_routing_ret_t>> call_batch_solve(
+  std::vector<routing::data_model_view_t<int, float>*>, routing::solver_settings_t<int, float>*);
+
 // Wrapper for dataset to expose the API to cython.
 std::unique_ptr<dataset_ret_t> call_generate_dataset(
   raft::handle_t const& handle, routing::generator::dataset_params_t<int, float> const& params);
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index 72d75cdc7..3b4cdc2f8 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -337,7 +337,7 @@ i_t optimization_problem_t<i_t, f_t>::get_n_integers() const
 {
   i_t n_integers = 0;
   if (get_n_variables() != 0) {
-    auto enum_variable_types = cuopt::host_copy(get_variable_types());
+    auto enum_variable_types = cuopt::host_copy(get_variable_types(), handle_ptr_->get_stream());
 
     for (size_t i = 0; i < enum_variable_types.size(); ++i) {
       if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; }
@@ -591,16 +591,17 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   data_model_view.set_maximize(get_sense());
 
   // Copy to host
-  auto constraint_matrix_values  = cuopt::host_copy(get_constraint_matrix_values());
-  auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices());
-  auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets());
-  auto constraint_bounds         = cuopt::host_copy(get_constraint_bounds());
-  auto objective_coefficients    = cuopt::host_copy(get_objective_coefficients());
-  auto variable_lower_bounds     = cuopt::host_copy(get_variable_lower_bounds());
-  auto variable_upper_bounds     = cuopt::host_copy(get_variable_upper_bounds());
-  auto constraint_lower_bounds   = cuopt::host_copy(get_constraint_lower_bounds());
-  auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds());
-  auto row_types                 = cuopt::host_copy(get_row_types());
+  auto stream                    = handle_ptr_->get_stream();
+  auto constraint_matrix_values  = cuopt::host_copy(get_constraint_matrix_values(), stream);
+  auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices(), stream);
+  auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets(), stream);
+  auto constraint_bounds         = cuopt::host_copy(get_constraint_bounds(), stream);
+  auto objective_coefficients    = cuopt::host_copy(get_objective_coefficients(), stream);
+  auto variable_lower_bounds     = cuopt::host_copy(get_variable_lower_bounds(), stream);
+  auto variable_upper_bounds     = cuopt::host_copy(get_variable_upper_bounds(), stream);
+  auto constraint_lower_bounds   = cuopt::host_copy(get_constraint_lower_bounds(), stream);
+  auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds(), stream);
+  auto row_types                 = cuopt::host_copy(get_row_types(), stream);
 
   // Set constraint matrix in CSR format
   if (get_nnz() != 0) {
@@ -652,7 +653,7 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
   std::vector<char> variable_types(get_n_variables());
   // Set variable types (convert from enum to char)
   if (get_n_variables() != 0) {
-    auto enum_variable_types = cuopt::host_copy(get_variable_types());
+    auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream);
 
     // Convert enum types to char types
     for (size_t i = 0; i < variable_types.size(); ++i) {
@@ -677,13 +678,17 @@ void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_
 template <typename i_t, typename f_t>
 void optimization_problem_t<i_t, f_t>::print_scaling_information() const
 {
-  std::vector<f_t> constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values());
-  std::vector<f_t> constraint_rhs           = cuopt::host_copy(get_constraint_bounds());
-  std::vector<f_t> objective_coefficients   = cuopt::host_copy(get_objective_coefficients());
-  std::vector<f_t> variable_lower_bounds    = cuopt::host_copy(get_variable_lower_bounds());
-  std::vector<f_t> variable_upper_bounds    = cuopt::host_copy(get_variable_upper_bounds());
-  std::vector<f_t> constraint_lower_bounds  = cuopt::host_copy(get_constraint_lower_bounds());
-  std::vector<f_t> constraint_upper_bounds  = cuopt::host_copy(get_constraint_upper_bounds());
+  auto stream = handle_ptr_->get_stream();
+  std::vector<f_t> constraint_matrix_values =
+    cuopt::host_copy(get_constraint_matrix_values(), stream);
+  std::vector<f_t> constraint_rhs         = cuopt::host_copy(get_constraint_bounds(), stream);
+  std::vector<f_t> objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream);
+  std::vector<f_t> variable_lower_bounds  = cuopt::host_copy(get_variable_lower_bounds(), stream);
+  std::vector<f_t> variable_upper_bounds  = cuopt::host_copy(get_variable_upper_bounds(), stream);
+  std::vector<f_t> constraint_lower_bounds =
+    cuopt::host_copy(get_constraint_lower_bounds(), stream);
+  std::vector<f_t> constraint_upper_bounds =
+    cuopt::host_copy(get_constraint_upper_bounds(), stream);
 
   auto findMaxAbs = [](const std::vector<f_t>& vec) -> f_t {
     if (vec.empty()) { return 0.0; }
diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp
index 8453ac3e7..19f6c024c 100644
--- a/cpp/src/linear_programming/translate.hpp
+++ b/cpp/src/linear_programming/translate.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -116,22 +116,23 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
 {
   CUOPT_LOG_DEBUG("Starting translation");
 
-  std::vector<f_t> pdlp_objective = cuopt::host_copy(problem.objective_coefficients);
+  auto stream                     = problem.handle_ptr->get_stream();
+  std::vector<f_t> pdlp_objective = cuopt::host_copy(problem.objective_coefficients, stream);
 
   dual_simplex::csr_matrix_t<i_t, f_t> csr_A(
     problem.n_constraints, problem.n_variables, problem.nnz);
-  csr_A.x         = cuopt::host_copy(problem.coefficients);
-  csr_A.j         = cuopt::host_copy(problem.variables);
-  csr_A.row_start = cuopt::host_copy(problem.offsets);
+  csr_A.x         = cuopt::host_copy(problem.coefficients, stream);
+  csr_A.j         = cuopt::host_copy(problem.variables, stream);
+  csr_A.row_start = cuopt::host_copy(problem.offsets, stream);
 
-  problem.handle_ptr->get_stream().synchronize();
+  stream.synchronize();
   CUOPT_LOG_DEBUG("Converting to compressed column");
   csr_A.to_compressed_col(lp.A);
   CUOPT_LOG_DEBUG("Converted to compressed column");
 
   std::vector<f_t> slack(problem.n_constraints);
-  std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution());
-  problem.handle_ptr->get_stream().synchronize();
+  std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream);
+  stream.synchronize();
   dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack);
   CUOPT_LOG_DEBUG("Multiplied A and x");
 
@@ -161,8 +162,8 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
 
   auto [lower, upper] = extract_host_bounds<f_t>(problem.variable_bounds, problem.handle_ptr);
 
-  std::vector<f_t> constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds);
-  std::vector<f_t> constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds);
+  std::vector<f_t> constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds, stream);
+  std::vector<f_t> constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds, stream);
 
   lp.objective.resize(n, 0.0);
   std::copy(
@@ -187,10 +188,10 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
     if (initial_solution.x[j] > lp.upper[j]) { initial_solution.x[j] = lp.upper[j]; }
   }
   CUOPT_LOG_DEBUG("Finished with x");
-  initial_solution.y = cuopt::host_copy(sol.get_dual_solution());
+  initial_solution.y = cuopt::host_copy(sol.get_dual_solution(), stream);
 
-  std::vector<f_t> tmp_z = cuopt::host_copy(sol.get_reduced_cost());
-  problem.handle_ptr->get_stream().synchronize();
+  std::vector<f_t> tmp_z = cuopt::host_copy(sol.get_reduced_cost(), stream);
+  stream.synchronize();
   std::copy(tmp_z.begin(), tmp_z.begin() + problem.n_variables, initial_solution.z.begin());
   for (i_t j = problem.n_variables; j < n; ++j) {
     initial_solution.z[j] = initial_solution.y[j - problem.n_variables];
diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu
index b7e3a5331..ba648f30e 100644
--- a/cpp/src/mip/diversity/lns/rins.cu
+++ b/cpp/src/mip/diversity/lns/rins.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
  * reserved. SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -221,7 +221,7 @@ void rins_t<i_t, f_t>::run_rins()
     &rins_handle, &fixed_problem, context.settings, context.scaling);
   fj_t<i_t, f_t> fj(fj_context);
   solution_t<i_t, f_t> fj_solution(fixed_problem);
-  fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment));
+  fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream()));
   std::vector<f_t> default_weights(fixed_problem.n_constraints, 1.);
   cpu_fj_thread_t<i_t, f_t> cpu_fj_thread;
   cpu_fj_thread.fj_cpu             = fj.create_cpu_climber(fj_solution,
diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh
index 051d51483..94cc66399 100644
--- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh
+++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -190,7 +190,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
                                       probing_values,
                                       n_vars_from_other,
                                       variable_map);
-      probing_config.probing_values         = host_copy(probing_values);
+      probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream());
       probing_config.n_of_fixed_from_first  = fixed_from_guiding;
       probing_config.n_of_fixed_from_second = fixed_from_other;
       probing_config.use_balanced_probing   = true;
@@ -214,7 +214,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
       timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit);
       get_probing_values_for_infeasible(
         guiding_solution, other_solution, offspring, probing_values, n_vars_from_other);
-      probing_config.probing_values = host_copy(probing_values);
+      probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream());
       constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config);
     }
     constraint_prop.max_n_failed_repair_iterations = 1;
diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu
index a3353e72f..ce497adfa 100644
--- a/cpp/src/mip/local_search/local_search.cu
+++ b/cpp/src/mip/local_search/local_search.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -110,7 +110,8 @@ void local_search_t<i_t, f_t>::start_cpufj_lptopt_scratch_threads(
   std::vector<f_t> default_weights(context.problem_ptr->n_constraints, 1.);
 
   solution_t<i_t, f_t> solution_lp(*context.problem_ptr);
-  solution_lp.copy_new_assignment(host_copy(lp_optimal_solution));
+  solution_lp.copy_new_assignment(
+    host_copy(lp_optimal_solution, context.problem_ptr->handle_ptr->get_stream()));
   solution_lp.round_random_nearest(500);
   scratch_cpu_fj_on_lp_opt.fj_cpu = fj.create_cpu_climber(
     solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_);
diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu
index 0d8fb6a08..6d0fb767d 100644
--- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu
+++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -230,11 +230,12 @@ void conditional_bound_strengthening_t<i_t, f_t>::select_constraint_pairs_host(
 #ifdef DEBUG_COND_BOUNDS_PROP
   auto start_time = std::chrono::high_resolution_clock::now();
 #endif
-  auto variables = cuopt::host_copy(problem.variables);
-  auto offsets   = cuopt::host_copy(problem.offsets);
+  auto stream    = problem.handle_ptr->get_stream();
+  auto variables = cuopt::host_copy(problem.variables, stream);
+  auto offsets   = cuopt::host_copy(problem.offsets, stream);
 
-  auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints);
-  auto reverse_offsets     = cuopt::host_copy(problem.reverse_offsets);
+  auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints, stream);
+  auto reverse_offsets     = cuopt::host_copy(problem.reverse_offsets, stream);
 
   std::vector<int2> constraint_pairs_h(max_pair_per_row * problem.n_constraints, {-1, -1});
   std::unordered_set<int> cnstr_pair;
@@ -295,8 +296,8 @@ void conditional_bound_strengthening_t<i_t, f_t>::select_constraint_pairs_device
                   colsC,
                   valsC);
   std::vector<int2> constraint_pairs_h;
-  offsets_h = cuopt::host_copy(offsetsC);
-  cols_h    = cuopt::host_copy(colsC);
+  offsets_h = cuopt::host_copy(offsetsC, stream);
+  cols_h    = cuopt::host_copy(colsC, stream);
 
   constraint_pairs_h.reserve(max_pair_per_row * problem.n_constraints);
   for (int i = 0; i < problem.n_constraints; ++i) {
@@ -654,8 +655,9 @@ void conditional_bound_strengthening_t<i_t, f_t>::solve(problem_t<i_t, f_t>& pro
     raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size;
 
 #ifdef DEBUG_COND_BOUNDS_PROP
-  auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds);
-  auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds);
+  auto debug_stream = problem.handle_ptr->get_stream();
+  auto old_lb_h     = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream);
+  auto old_ub_h     = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream);
 
   auto start_time = std::chrono::high_resolution_clock::now();
 #endif
@@ -674,8 +676,8 @@ void conditional_bound_strengthening_t<i_t, f_t>::solve(problem_t<i_t, f_t>& pro
   double time_for_presolve =
     std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
 
-  auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds);
-  auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds);
+  auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream);
+  auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream);
 
   int num_improvements = 0;
   int num_new_equality = 0;
diff --git a/cpp/src/mip/presolve/lb_probing_cache.cu b/cpp/src/mip/presolve/lb_probing_cache.cu
index 4a03a86fd..790ed32e4 100644
--- a/cpp/src/mip/presolve/lb_probing_cache.cu
+++ b/cpp/src/mip/presolve/lb_probing_cache.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -302,7 +302,7 @@ inline std::vector<i_t> compute_prioritized_integer_indices(
                         }
                         return false;
                       });
-  auto h_priority_indices = host_copy(priority_indices);
+  auto h_priority_indices = host_copy(priority_indices, problem.pb->handle_ptr->get_stream());
   return h_priority_indices;
 }
 
@@ -315,9 +315,10 @@ void compute_probing_cache(load_balanced_bounds_presolve_t<i_t, f_t>& bound_pres
   auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem);
   // std::cout<<"priority_indices\n";
   CUOPT_LOG_DEBUG("Computing probing cache");
-  auto h_integer_indices      = host_copy(problem.pb->integer_indices);
-  auto h_var_upper_bounds     = host_copy(problem.pb->variable_upper_bounds);
-  auto h_var_lower_bounds     = host_copy(problem.pb->variable_lower_bounds);
+  auto stream                 = problem.pb->handle_ptr->get_stream();
+  auto h_integer_indices      = host_copy(problem.pb->integer_indices, stream);
+  auto h_var_upper_bounds     = host_copy(problem.pb->variable_upper_bounds, stream);
+  auto h_var_lower_bounds     = host_copy(problem.pb->variable_lower_bounds, stream);
   size_t n_of_cached_probings = 0;
   // TODO adjust the iteration limit depending on the total time limit and time it takes for single
   // var
diff --git a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh
index 03ff6b2c0..0ace09afb 100644
--- a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh
+++ b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -181,7 +181,7 @@ class log_dist_t {
   log_dist_t() = default;
 
   log_dist_t(rmm::device_uvector<i_t>& vertex_id, rmm::device_uvector<i_t>& bin_offsets)
-    : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets))
+    : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets, bin_offsets.stream()))
   {
     // If bin_offsets_ is smaller than NumberBins<i_t> then resize it
     // so that the last element is repeated
diff --git a/cpp/src/mip/presolve/probing_cache.cu b/cpp/src/mip/presolve/probing_cache.cu
index 18620dc51..e191cdde9 100644
--- a/cpp/src/mip/presolve/probing_cache.cu
+++ b/cpp/src/mip/presolve/probing_cache.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -346,7 +346,7 @@ inline std::vector<i_t> compute_prioritized_integer_indices(
                         }
                         return false;
                       });
-  auto h_priority_indices = host_copy(priority_indices);
+  auto h_priority_indices = host_copy(priority_indices, problem.handle_ptr->get_stream());
   problem.handle_ptr->sync_stream();
   return h_priority_indices;
 }
@@ -461,8 +461,9 @@ void compute_probing_cache(bound_presolve_t<i_t, f_t>& bound_presolve,
   // we dont want to compute the probing cache for all variables for time and computation resources
   auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem);
   CUOPT_LOG_DEBUG("Computing probing cache");
-  auto h_integer_indices  = host_copy(problem.integer_indices);
-  const auto h_var_bounds = host_copy(problem.variable_bounds);
+  auto stream             = problem.handle_ptr->get_stream();
+  auto h_integer_indices  = host_copy(problem.integer_indices, stream);
+  const auto h_var_bounds = host_copy(problem.variable_bounds, stream);
   // TODO adjust the iteration limit depending on the total time limit and time it takes for single
   // var
   bound_presolve.settings.iteration_limit = 50;
diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh
index cf9659662..c2a3927ea 100644
--- a/cpp/src/mip/presolve/trivial_presolve.cuh
+++ b/cpp/src/mip/presolve/trivial_presolve.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -313,12 +313,13 @@ void update_from_csr(problem_t<i_t, f_t>& pb)
 template <typename i_t, typename f_t>
 void test_reverse_matches(const problem_t<i_t, f_t>& pb)
 {
-  auto h_offsets              = cuopt::host_copy(pb.offsets);
-  auto h_coefficients         = cuopt::host_copy(pb.coefficients);
-  auto h_variables            = cuopt::host_copy(pb.variables);
-  auto h_reverse_offsets      = cuopt::host_copy(pb.reverse_offsets);
-  auto h_reverse_constraints  = cuopt::host_copy(pb.reverse_constraints);
-  auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients);
+  auto stream                 = pb.handle_ptr->get_stream();
+  auto h_offsets              = cuopt::host_copy(pb.offsets, stream);
+  auto h_coefficients         = cuopt::host_copy(pb.coefficients, stream);
+  auto h_variables            = cuopt::host_copy(pb.variables, stream);
+  auto h_reverse_offsets      = cuopt::host_copy(pb.reverse_offsets, stream);
+  auto h_reverse_constraints  = cuopt::host_copy(pb.reverse_constraints, stream);
+  auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients, stream);
 
   std::vector<std::unordered_set<i_t>> vars_per_constr(pb.n_constraints);
   std::vector<std::unordered_set<f_t>> coeff_per_constr(pb.n_constraints);
diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu
index 5be50fb05..815ef5aa0 100644
--- a/cpp/src/mip/problem/problem.cu
+++ b/cpp/src/mip/problem/problem.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -1344,7 +1344,7 @@ problem_t<i_t, f_t> problem_t<i_t, f_t>::get_problem_after_fixing_vars(
   // do an assignment from the original_ids of the current problem
   problem.original_ids.resize(variable_map.size());
   std::fill(problem.reverse_original_ids.begin(), problem.reverse_original_ids.end(), -1);
-  auto h_variable_map = cuopt::host_copy(variable_map);
+  auto h_variable_map = cuopt::host_copy(variable_map, handle_ptr->get_stream());
   for (size_t i = 0; i < variable_map.size(); ++i) {
     cuopt_assert(h_variable_map[i] < original_ids.size(), "Variable index out of bounds");
     problem.original_ids[i] = original_ids[h_variable_map[i]];
@@ -1522,9 +1522,10 @@ std::vector<std::vector<std::pair<i_t, f_t>>> compute_var_to_constraint_map(
 {
   raft::common::nvtx::range fun_scope("compute_var_to_constraint_map");
   std::vector<std::vector<std::pair<i_t, f_t>>> variable_constraint_map(pb.n_variables);
-  auto h_variables    = cuopt::host_copy(pb.variables);
-  auto h_coefficients = cuopt::host_copy(pb.coefficients);
-  auto h_offsets      = cuopt::host_copy(pb.offsets);
+  auto stream         = pb.handle_ptr->get_stream();
+  auto h_variables    = cuopt::host_copy(pb.variables, stream);
+  auto h_coefficients = cuopt::host_copy(pb.coefficients, stream);
+  auto h_offsets      = cuopt::host_copy(pb.offsets, stream);
   for (i_t cnst = 0; cnst < pb.n_constraints; ++cnst) {
     for (i_t i = h_offsets[cnst]; i < h_offsets[cnst + 1]; ++i) {
       i_t var   = h_variables[i];
@@ -1542,10 +1543,11 @@ void standardize_bounds(std::vector<std::vector<std::pair<i_t, f_t>>>& variable_
 {
   raft::common::nvtx::range fun_scope("standardize_bounds");
   auto handle_ptr               = pb.handle_ptr;
-  auto h_var_bounds             = cuopt::host_copy(pb.variable_bounds);
-  auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients);
-  auto h_variable_types         = cuopt::host_copy(pb.variable_types);
-  auto h_var_flags              = cuopt::host_copy(pb.presolve_data.var_flags);
+  auto stream                   = handle_ptr->get_stream();
+  auto h_var_bounds             = cuopt::host_copy(pb.variable_bounds, stream);
+  auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients, stream);
+  auto h_variable_types         = cuopt::host_copy(pb.variable_types, stream);
+  auto h_var_flags              = cuopt::host_copy(pb.presolve_data.var_flags, stream);
   handle_ptr->sync_stream();
 
   const i_t n_vars_originally = (i_t)h_var_bounds.size();
@@ -1687,12 +1689,13 @@ void problem_t<i_t, f_t>::get_host_user_problem(
   i_t nz                 = nnz;
   user_problem.num_rows  = m;
   user_problem.num_cols  = n;
-  user_problem.objective = cuopt::host_copy(objective_coefficients);
+  auto stream            = handle_ptr->get_stream();
+  user_problem.objective = cuopt::host_copy(objective_coefficients, stream);
 
   dual_simplex::csr_matrix_t<i_t, f_t> csr_A(m, n, nz);
-  csr_A.x         = cuopt::host_copy(coefficients);
-  csr_A.j         = cuopt::host_copy(variables);
-  csr_A.row_start = cuopt::host_copy(offsets);
+  csr_A.x         = cuopt::host_copy(coefficients, stream);
+  csr_A.j         = cuopt::host_copy(variables, stream);
+  csr_A.row_start = cuopt::host_copy(offsets, stream);
 
   csr_A.to_compressed_col(user_problem.A);
 
@@ -1701,8 +1704,8 @@ void problem_t<i_t, f_t>::get_host_user_problem(
   user_problem.range_rows.clear();
   user_problem.range_value.clear();
 
-  auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds);
-  auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds);
+  auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds, stream);
+  auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds, stream);
 
   // All constraints have lower and upper bounds
   // lr <= a_i^T x <= ur
@@ -1763,7 +1766,7 @@ void problem_t<i_t, f_t>::get_host_user_problem(
   user_problem.obj_scale    = presolve_data.objective_scaling_factor;
   user_problem.var_types.resize(n);
 
-  auto model_variable_types = cuopt::host_copy(variable_types);
+  auto model_variable_types = cuopt::host_copy(variable_types, stream);
   for (int j = 0; j < n; ++j) {
     user_problem.var_types[j] =
       model_variable_types[j] == var_t::CONTINUOUS
@@ -1781,7 +1784,8 @@ template <typename i_t, typename f_t>
 void problem_t<i_t, f_t>::compute_vars_with_objective_coeffs()
 {
   raft::common::nvtx::range fun_scope("compute_vars_with_objective_coeffs");
-  auto h_objective_coefficients = cuopt::host_copy(objective_coefficients);
+  auto h_objective_coefficients =
+    cuopt::host_copy(objective_coefficients, handle_ptr->get_stream());
   std::vector<i_t> vars_with_objective_coeffs_;
   std::vector<f_t> objective_coeffs_;
   for (i_t i = 0; i < n_variables; ++i) {
diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu
index 36bef4142..9e9a2d75f 100644
--- a/cpp/src/mip/solution/solution.cu
+++ b/cpp/src/mip/solution/solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -229,11 +229,12 @@ void solution_t<i_t, f_t>::assign_random_within_bounds(f_t ratio_of_vars_to_rand
                                                        bool only_integers)
 {
   std::mt19937 rng(cuopt::seed_generator::get_seed());
-  std::vector<f_t> h_assignment = host_copy(assignment);
+  auto stream                   = handle_ptr->get_stream();
+  std::vector<f_t> h_assignment = host_copy(assignment, stream);
   std::uniform_real_distribution<f_t> unif_prob(0, 1);
 
-  auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds);
-  auto variable_types  = cuopt::host_copy(problem_ptr->variable_types);
+  auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds, stream);
+  auto variable_types  = cuopt::host_copy(problem_ptr->variable_types, stream);
   problem_ptr->handle_ptr->sync_stream();
   for (size_t i = 0; i < problem_ptr->variable_bounds.size(); ++i) {
     if (only_integers && variable_types[i] != var_t::INTEGER) { continue; }
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index e5161882e..e6a392d40 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -259,7 +259,8 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
                      std::numeric_limits<f_t>::signaling_NaN());
         detail::problem_t<i_t, f_t> full_problem(op_problem);
         detail::solution_t<i_t, f_t> full_sol(full_problem);
-        full_sol.copy_new_assignment(cuopt::host_copy(primal_solution));
+        full_sol.copy_new_assignment(
+          cuopt::host_copy(primal_solution, op_problem.get_handle_ptr()->get_stream()));
         full_sol.compute_feasibility();
         if (!full_sol.get_feasible()) {
           CUOPT_LOG_WARN("The solution is not feasible after post solve");
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 0da4c6398..7311a26fd 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -136,7 +136,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     auto opt_sol = solve_lp_with_method<i_t, f_t>(*context.problem_ptr, settings, lp_timer);
 
     solution_t<i_t, f_t> sol(*context.problem_ptr);
-    sol.copy_new_assignment(host_copy(opt_sol.get_primal_solution()));
+    sol.copy_new_assignment(
+      host_copy(opt_sol.get_primal_solution(), context.problem_ptr->handle_ptr->get_stream()));
     if (opt_sol.get_termination_status() == pdlp_termination_status_t::Optimal ||
         opt_sol.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible ||
         opt_sol.get_termination_status() == pdlp_termination_status_t::DualInfeasible) {
diff --git a/cpp/src/routing/adapters/adapted_sol.cuh b/cpp/src/routing/adapters/adapted_sol.cuh
index 48ebbdf2f..463f876ff 100644
--- a/cpp/src/routing/adapters/adapted_sol.cuh
+++ b/cpp/src/routing/adapters/adapted_sol.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -449,8 +449,9 @@ struct adapted_sol_t {
   void populate_unserviced_nodes()
   {
     raft::common::nvtx::range fun_scope("populate_unserviced_nodes");
-    has_unserviced_nodes     = false;
-    auto h_route_id_per_node = host_copy(sol.route_node_map.route_id_per_node);
+    has_unserviced_nodes = false;
+    auto h_route_id_per_node =
+      host_copy(sol.route_node_map.route_id_per_node, sol.sol_handle->get_stream());
     for (size_t i = 0; i < h_route_id_per_node.size(); ++i) {
       if (h_route_id_per_node[i] == -1) {
         pred[i]            = NodeInfo<>();
@@ -487,12 +488,13 @@ struct adapted_sol_t {
       skip_route_copy = false;
     }
     std::vector<i_t> h_routes_to_copy;
-    if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy);
+    if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy, sol.sol_handle->get_stream());
     for (i_t i = 0; i < sol.n_routes && !skip_route_copy; ++i) {
       if (!copy_all && h_routes_to_copy[i] == 0) continue;
-      auto& curr_route     = sol.get_route(i);
-      auto node_infos_temp = host_copy(curr_route.dimensions.requests.node_info);
-      i_t n_nodes          = curr_route.n_nodes.value(sol.sol_handle->get_stream());
+      auto& curr_route = sol.get_route(i);
+      auto node_infos_temp =
+        host_copy(curr_route.dimensions.requests.node_info, sol.sol_handle->get_stream());
+      i_t n_nodes = curr_route.n_nodes.value(sol.sol_handle->get_stream());
 
       // Remove break nodes for diversity
       std::vector<NodeInfo<>> node_infos;
diff --git a/cpp/src/routing/adapters/assignment_adapter.cuh b/cpp/src/routing/adapters/assignment_adapter.cuh
index e7e945eb9..c41c3e161 100644
--- a/cpp/src/routing/adapters/assignment_adapter.cuh
+++ b/cpp/src/routing/adapters/assignment_adapter.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -84,18 +84,18 @@ assignment_t<i_t> ges_solver_t<i_t, f_t, REQUEST>::get_ges_assignment(
       auto route_id             = route.route_id.value(stream);
       auto vehicle_id           = route.vehicle_id.value(stream);
 
-      auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info);
+      auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info, stream);
       std::vector<double> departure_forward_h(node_infos_h.size(), 0.);
       std::vector<double> actual_arrival_h(node_infos_h.size(), 0.);
       std::vector<double> earliest_arrival_backward_h(node_infos_h.size(), 0.);
       std::vector<double> latest_arrival_forward_h(node_infos_h.size(), 0.);
       if (problem.dimensions_info.has_dimension(detail::dim_t::TIME)) {
-        departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward);
-        actual_arrival_h    = cuopt::host_copy(route.dimensions.time_dim.actual_arrival);
+        departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward, stream);
+        actual_arrival_h    = cuopt::host_copy(route.dimensions.time_dim.actual_arrival, stream);
         earliest_arrival_backward_h =
-          cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward);
+          cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward, stream);
         latest_arrival_forward_h =
-          cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward);
+          cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward, stream);
       }
 
       i_t drop_return_trip = sol.problem_ptr->drop_return_trip_h[vehicle_id];
diff --git a/cpp/src/routing/adapters/solution_adapter.cuh b/cpp/src/routing/adapters/solution_adapter.cuh
index ba292a688..5571f4b3b 100644
--- a/cpp/src/routing/adapters/solution_adapter.cuh
+++ b/cpp/src/routing/adapters/solution_adapter.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -25,10 +25,11 @@ void fill_routes_data(solution_t<i_t, f_t, REQUEST>& sol,
                       const problem_t<i_t, f_t>& problem)
 {
   const auto n_routes    = assignment.get_vehicle_count();
-  auto h_route           = cuopt::host_copy(assignment.get_route());
-  auto h_truck_ids       = cuopt::host_copy(assignment.get_truck_id());
-  auto h_route_locations = cuopt::host_copy(assignment.get_order_locations());
-  auto h_node_types      = cuopt::host_copy(assignment.get_node_types());
+  auto stream            = sol.sol_handle->get_stream();
+  auto h_route           = cuopt::host_copy(assignment.get_route(), stream);
+  auto h_truck_ids       = cuopt::host_copy(assignment.get_truck_id(), stream);
+  auto h_route_locations = cuopt::host_copy(assignment.get_order_locations(), stream);
+  auto h_node_types      = cuopt::host_copy(assignment.get_node_types(), stream);
 
   sol.sol_handle->sync_stream();
   assignment.get_truck_id().stream().synchronize();
diff --git a/cpp/src/routing/assignment.cu b/cpp/src/routing/assignment.cu
index 6bf468502..4636fa735 100644
--- a/cpp/src/routing/assignment.cu
+++ b/cpp/src/routing/assignment.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -248,13 +248,14 @@ void assignment_t<i_t>::print(std::ostream& os) const noexcept
 template <typename i_t>
 host_assignment_t<i_t>::host_assignment_t(const assignment_t<i_t>& routing_solution)
 {
-  route            = cuopt::host_copy(routing_solution.get_route());
-  truck_id         = cuopt::host_copy(routing_solution.get_truck_id());
-  stamp            = cuopt::host_copy(routing_solution.get_arrival_stamp());
-  locations        = cuopt::host_copy(routing_solution.get_order_locations());
-  node_types       = cuopt::host_copy(routing_solution.get_node_types());
-  unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes());
-  accepted         = cuopt::host_copy(routing_solution.get_accepted());
+  auto stream      = routing_solution.get_route().stream();
+  route            = cuopt::host_copy(routing_solution.get_route(), stream);
+  truck_id         = cuopt::host_copy(routing_solution.get_truck_id(), stream);
+  stamp            = cuopt::host_copy(routing_solution.get_arrival_stamp(), stream);
+  locations        = cuopt::host_copy(routing_solution.get_order_locations(), stream);
+  node_types       = cuopt::host_copy(routing_solution.get_node_types(), stream);
+  unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes(), stream);
+  accepted         = cuopt::host_copy(routing_solution.get_accepted(), stream);
 }
 
 template <typename i_t>
diff --git a/cpp/src/routing/crossovers/ox_graph.hpp b/cpp/src/routing/crossovers/ox_graph.hpp
index 6f65b0843..55d2e39a0 100644
--- a/cpp/src/routing/crossovers/ox_graph.hpp
+++ b/cpp/src/routing/crossovers/ox_graph.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -39,14 +39,14 @@ struct ox_graph_t {
     std::vector<int> buckets;
   };
 
-  host_t to_host()
+  host_t to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.row_sizes = host_copy(row_sizes);
-    h.route_ids = host_copy(route_ids);
-    h.indices   = host_copy(indices);
-    h.weights   = host_copy(weights);
-    h.buckets   = host_copy(buckets);
+    h.row_sizes = host_copy(row_sizes, stream);
+    h.route_ids = host_copy(route_ids, stream);
+    h.indices   = host_copy(indices, stream);
+    h.weights   = host_copy(weights, stream);
+    h.buckets   = host_copy(buckets, stream);
     return h;
   }
 
diff --git a/cpp/src/routing/crossovers/ox_recombiner.cuh b/cpp/src/routing/crossovers/ox_recombiner.cuh
index 17823c28b..681943d3c 100644
--- a/cpp/src/routing/crossovers/ox_recombiner.cuh
+++ b/cpp/src/routing/crossovers/ox_recombiner.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -433,7 +433,7 @@ struct OX {
       h_graph[i].reserve(problem_size);
     }
 
-    adj_to_host(h_graph);
+    adj_to_host(h_graph, A.sol.sol_handle->get_stream());
 
     std::vector<std::vector<double>> h_path_cost(problem_size + 1);
     // Vector of parents to recreate optimal path
@@ -500,7 +500,7 @@ struct OX {
     }
   }
 
-  void test_transpose_graph()
+  void test_transpose_graph(rmm::cuda_stream_view stream)
   {
     std::vector<std::vector<std::tuple<int, double, int>>> h_transpose_graph(offspring.size());
     for (size_t i = 0; i < h_transpose_graph.size(); ++i) {
@@ -512,7 +512,7 @@ struct OX {
       tmp_graph[i].reserve(problem_size);
     }
 
-    adj_to_host(tmp_graph);
+    adj_to_host(tmp_graph, stream);
 
     for (size_t i = 0; i < tmp_graph.size(); ++i) {
       for (size_t j = 0; j < tmp_graph[i].size(); ++j) {
@@ -521,7 +521,7 @@ struct OX {
       }
     }
 
-    auto tmp_transpose = transpose_graph.to_host();
+    auto tmp_transpose = transpose_graph.to_host(stream);
 
     for (size_t i = 0; i < h_transpose_graph.size(); ++i) {
       auto transpose_offset =
@@ -618,7 +618,7 @@ struct OX {
     raft::common::nvtx::range fun_scope("bellman_ford");
 
     compute_transpose_graph(A);
-    cuopt_func_call(test_transpose_graph());
+    cuopt_func_call(test_transpose_graph(A.sol.sol_handle->get_stream()));
 
     auto row_size = offspring.size();
     d_path_cost.resize((problem_size + 1) * row_size, A.sol.sol_handle->get_stream());
@@ -789,9 +789,10 @@ struct OX {
     offspring[0] = 0;
   }
 
-  void adj_to_host(std::vector<std::vector<std::tuple<int, double, int>>>& h_graph)
+  void adj_to_host(std::vector<std::vector<std::tuple<int, double, int>>>& h_graph,
+                   rmm::cuda_stream_view stream)
   {
-    auto tmp_graph = d_graph.to_host();
+    auto tmp_graph = d_graph.to_host(stream);
     for (int veh = 0; veh < n_buckets; ++veh) {
       for (size_t i = 0; i < d_graph.get_num_vertices(); ++i) {
         auto row_size      = tmp_graph.row_sizes[veh * d_graph.get_num_vertices() + i];
@@ -818,7 +819,7 @@ struct OX {
     for (size_t i = 0; i < h_graph.size(); ++i) {
       h_graph[i].reserve(max_route_len);
     }
-    adj_to_host(h_graph);
+    adj_to_host(h_graph, A.sol.sol_handle->get_stream());
 
     const auto& dimensions_info = A.problem->dimensions_info;
 
diff --git a/cpp/src/routing/cuda_graph.cuh b/cpp/src/routing/cuda_graph.cuh
index aa7d890d1..1fb2425d2 100644
--- a/cpp/src/routing/cuda_graph.cuh
+++ b/cpp/src/routing/cuda_graph.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -20,7 +20,9 @@ namespace detail {
 struct cuda_graph_t {
   void start_capture(rmm::cuda_stream_view stream)
   {
-    cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal);
+    // Use ThreadLocal mode to allow multi-threaded batch execution
+    // Global mode blocks other streams from performing operations during capture
+    cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
     capture_started = true;
   }
 
diff --git a/cpp/src/routing/fleet_info.hpp b/cpp/src/routing/fleet_info.hpp
index 929a26959..1a37c6655 100644
--- a/cpp/src/routing/fleet_info.hpp
+++ b/cpp/src/routing/fleet_info.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -70,32 +70,29 @@ class fleet_info_t {
     v_buckets_.resize(size, stream);
   }
 
-  auto to_host()
+  auto to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.break_offset            = host_copy(v_break_offset_);
-    h.break_durations         = host_copy(v_break_duration_);
-    h.break_earliest          = host_copy(v_break_earliest_);
-    h.break_latest            = host_copy(v_break_latest_);
-    h.earliest_time           = host_copy(v_earliest_time_);
-    h.latest_time             = host_copy(v_latest_time_);
-    h.start_locations         = host_copy(v_start_locations_);
-    h.return_locations        = host_copy(v_return_locations_);
-    h.drop_return_trip        = host_copy(v_drop_return_trip_);
-    h.skip_first_trip         = host_copy(v_skip_first_trip_);
-    h.capacities              = host_copy(v_capacities_);
-    h.max_costs               = host_copy(v_max_costs_);
-    h.max_times               = host_copy(v_max_times_);
-    h.fixed_costs             = host_copy(v_fixed_costs_);
-    h.fleet_order_constraints = fleet_order_constraints_.to_host();
-    h.types                   = host_copy(v_types_);
-    h.buckets                 = host_copy(v_buckets_);
+    h.break_offset            = host_copy(v_break_offset_, stream);
+    h.break_durations         = host_copy(v_break_duration_, stream);
+    h.break_earliest          = host_copy(v_break_earliest_, stream);
+    h.break_latest            = host_copy(v_break_latest_, stream);
+    h.earliest_time           = host_copy(v_earliest_time_, stream);
+    h.latest_time             = host_copy(v_latest_time_, stream);
+    h.start_locations         = host_copy(v_start_locations_, stream);
+    h.return_locations        = host_copy(v_return_locations_, stream);
+    h.drop_return_trip        = host_copy(v_drop_return_trip_, stream);
+    h.skip_first_trip         = host_copy(v_skip_first_trip_, stream);
+    h.capacities              = host_copy(v_capacities_, stream);
+    h.max_costs               = host_copy(v_max_costs_, stream);
+    h.max_times               = host_copy(v_max_times_, stream);
+    h.fixed_costs             = host_copy(v_fixed_costs_, stream);
+    h.fleet_order_constraints = fleet_order_constraints_.to_host(stream);
+    h.types                   = host_copy(v_types_, stream);
+    h.buckets                 = host_copy(v_buckets_, stream);
     h.matrices                = detail::create_host_mdarray<f_t>(
       matrices_.extent[2], matrices_.extent[0], matrices_.extent[1]);
-    raft::copy(h.matrices.buffer.data(),
-               matrices_.buffer.data(),
-               matrices_.buffer.size(),
-               matrices_.buffer.stream());
+    raft::copy(h.matrices.buffer.data(), matrices_.buffer.data(), matrices_.buffer.size(), stream);
     return h;
   }
 
diff --git a/cpp/src/routing/fleet_order_constraints.hpp b/cpp/src/routing/fleet_order_constraints.hpp
index fdd8f8fd8..c6be63a87 100644
--- a/cpp/src/routing/fleet_order_constraints.hpp
+++ b/cpp/src/routing/fleet_order_constraints.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -67,11 +67,11 @@ struct fleet_order_constraints_t {
     i_t n_vehicles;
   };
 
-  host_t to_host()
+  host_t to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.order_service_times = host_copy(order_service_times);
-    auto tmp_order_match  = host_copy(order_match);
+    h.order_service_times = host_copy(order_service_times, stream);
+    auto tmp_order_match  = host_copy(order_match, stream);
     h.order_match         = thrust::host_vector<bool>(tmp_order_match);
     h.n_orders            = n_orders;
     h.n_vehicles          = n_vehicles;
diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu
index 37e5905dd..194f73b99 100644
--- a/cpp/src/routing/ges_solver.cu
+++ b/cpp/src/routing/ges_solver.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -30,7 +30,10 @@ ges_solver_t<i_t, f_t, REQUEST>::ges_solver_t(const data_model_view_t<i_t, f_t>&
   : timer(time_limit_),
     problem(data_model, solver_settings),
     // override for now
-    pool_allocator(problem, max_sol_per_population, expected_route_count_),
+    pool_allocator(problem,
+                   max_sol_per_population,
+                   data_model.get_handle_ptr()->get_stream(),
+                   expected_route_count_),
     expected_route_count(expected_route_count_),
     intermediate_file(intermediate_file_)
 {
diff --git a/cpp/src/routing/local_search/cycle_finder/cycle.hpp b/cpp/src/routing/local_search/cycle_finder/cycle.hpp
index b38cb72c6..7e3e275e1 100644
--- a/cpp/src/routing/local_search/cycle_finder/cycle.hpp
+++ b/cpp/src/routing/local_search/cycle_finder/cycle.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -45,11 +45,11 @@ struct ret_cycles_t {
     i_t n_cycles;
   };
 
-  host_t to_host()
+  host_t to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.paths    = host_copy(paths);
-    h.offsets  = host_copy(offsets);
+    h.paths    = host_copy(paths, stream);
+    h.offsets  = host_copy(offsets, stream);
     h.n_cycles = size();
     return h;
   }
diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu
index a8a5d8aaf..65d654b06 100644
--- a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu
+++ b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -213,8 +213,9 @@ template <typename i_t, typename f_t, size_t max_routes>
 bool ExactCycleFinder<i_t, f_t, max_routes>::check_cycle(graph_t<i_t, f_t>& graph,
                                                          ret_cycles_t<i_t, f_t>& ret)
 {
-  auto h_graph      = graph.to_host();
-  auto h_cycles     = ret.to_host();
+  auto stream       = handle_ptr->get_stream();
+  auto h_graph      = graph.to_host(stream);
+  auto h_cycles     = ret.to_host(stream);
   bool cost_matches = true;
   std::unordered_set<i_t> changed_route_ids;
   for (i_t cycle = 0; cycle < h_cycles.n_cycles; ++cycle) {
diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp
index a08f5f1a3..3c28f78bc 100644
--- a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp
+++ b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -45,13 +45,13 @@ struct graph_t {
     std::vector<double> weights;
   };
 
-  host_t to_host()
+  host_t to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.row_sizes = host_copy(row_sizes);
-    h.route_ids = host_copy(route_ids);
-    h.indices   = host_copy(indices);
-    h.weights   = host_copy(weights);
+    h.row_sizes = host_copy(row_sizes, stream);
+    h.route_ids = host_copy(route_ids, stream);
+    h.indices   = host_copy(indices, stream);
+    h.weights   = host_copy(weights, stream);
     return h;
   }
 
diff --git a/cpp/src/routing/order_info.hpp b/cpp/src/routing/order_info.hpp
index 6f0fb1ecc..d20c46a8e 100644
--- a/cpp/src/routing/order_info.hpp
+++ b/cpp/src/routing/order_info.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -57,14 +57,14 @@ class order_info_t {
 
   bool is_pdp() const { return !v_pair_indices_.is_empty(); }
 
-  auto to_host()
+  auto to_host(rmm::cuda_stream_view stream)
   {
     host_t h;
-    h.earliest_time   = cuopt::host_copy(v_earliest_time_);
-    h.latest_time     = cuopt::host_copy(v_latest_time_);
-    h.demand          = cuopt::host_copy(v_demand_);
-    h.prizes          = cuopt::host_copy(v_prizes_);
-    h.order_locations = cuopt::host_copy(v_order_locations_);
+    h.earliest_time   = cuopt::host_copy(v_earliest_time_, stream);
+    h.latest_time     = cuopt::host_copy(v_latest_time_, stream);
+    h.demand          = cuopt::host_copy(v_demand_, stream);
+    h.prizes          = cuopt::host_copy(v_prizes_, stream);
+    h.order_locations = cuopt::host_copy(v_order_locations_, stream);
     h.depot_included  = depot_included_;
     return h;
   }
diff --git a/cpp/src/routing/problem/problem.cu b/cpp/src/routing/problem/problem.cu
index 1df7077fb..4335b9373 100644
--- a/cpp/src/routing/problem/problem.cu
+++ b/cpp/src/routing/problem/problem.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -55,7 +55,7 @@ problem_t<i_t, f_t>::problem_t(const data_model_view_t<i_t, f_t>& data_model_vie
              pair_indices_h.size(),
              handle_ptr->get_stream());
 
-  vehicle_types_h = cuopt::host_copy(fleet_info.v_types_);
+  vehicle_types_h = cuopt::host_copy(fleet_info.v_types_, handle_ptr->get_stream());
   for (auto& vtype : vehicle_types_h) {
     if (!distance_matrices_h.count(vtype)) {
       auto cost_matrix = fleet_info.matrices_.get_cost_matrix(vtype);
@@ -104,7 +104,7 @@ void problem_t<i_t, f_t>::populate_vehicle_buckets()
 {
   auto fleet_size = data_view_ptr->get_fleet_size();
   vehicle_buckets_h.resize(fleet_size);
-  fleet_info_h = fleet_info.to_host();
+  fleet_info_h = fleet_info.to_host(handle_ptr->get_stream());
 
   // infer vehicle types from data model
   for (int vehicle_id = 0; vehicle_id < fleet_size; ++vehicle_id) {
@@ -375,7 +375,7 @@ void problem_t<i_t, f_t>::populate_host_arrays()
   auto pickup_indices = data_view_ptr->get_pickup_delivery_pair().first;
   auto stream         = data_view_ptr->get_handle_ptr()->get_stream();
 
-  order_locations_h = cuopt::host_copy(order_info.v_order_locations_);
+  order_locations_h = cuopt::host_copy(order_info.v_order_locations_, stream);
   // Temporarily fill is_pickup_h for diversity, should use NodeInfo instead
   bool is_pdp = pickup_indices != nullptr;
   std::vector<i_t> h_pickup_indices(get_num_requests());
@@ -387,18 +387,20 @@ void problem_t<i_t, f_t>::populate_host_arrays()
     }
   }
 
-  drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_);
-  skip_first_trip_h  = cuopt::host_copy(fleet_info.v_skip_first_trip_);
-  order_info_h       = order_info.to_host();
+  drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_, stream);
+  skip_first_trip_h  = cuopt::host_copy(fleet_info.v_skip_first_trip_, stream);
+  order_info_h       = order_info.to_host(stream);
   handle_ptr->sync_stream();
 }
 
 template <typename i_t, typename f_t>
 void problem_t<i_t, f_t>::initialize_depot_info()
 {
-  int nvehicles                 = fleet_info.v_start_locations_.size();
-  auto vehicle_start_locations  = cuopt::host_copy(fleet_info.v_start_locations_);
-  auto vehicle_return_locations = cuopt::host_copy(fleet_info.v_return_locations_);
+  int nvehicles = fleet_info.v_start_locations_.size();
+  auto vehicle_start_locations =
+    cuopt::host_copy(fleet_info.v_start_locations_, handle_ptr->get_stream());
+  auto vehicle_return_locations =
+    cuopt::host_copy(fleet_info.v_return_locations_, handle_ptr->get_stream());
 
   start_depot_node_infos_h.resize(nvehicles);
   return_depot_node_infos_h.resize(nvehicles);
@@ -518,8 +520,8 @@ void problem_t<i_t, f_t>::populate_special_nodes()
 
   int n_vehicles = get_fleet_size();
 
-  auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_);
-  auto vehicle_latest_h   = cuopt::host_copy(fleet_info.v_latest_time_);
+  auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_, handle_ptr->get_stream());
+  auto vehicle_latest_h   = cuopt::host_copy(fleet_info.v_latest_time_, handle_ptr->get_stream());
   std::map<int, std::vector<int>> break_earliest_h, break_latest_h, break_duration_h;
   std::vector<int> break_offset_h(n_vehicles + 1, 0), break_nodes_offset_h;
 
diff --git a/cpp/src/routing/solution/pool_allocator.cuh b/cpp/src/routing/solution/pool_allocator.cuh
index 89049a698..d78df6951 100644
--- a/cpp/src/routing/solution/pool_allocator.cuh
+++ b/cpp/src/routing/solution/pool_allocator.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -42,8 +42,11 @@ class routing_resource_t {
 template <typename i_t, typename f_t, typename Solution, typename Problem>
 class pool_allocator_t {
  public:
-  pool_allocator_t(const Problem& problem_, i_t n_solutions_, i_t desired_n_routes = -1)
-    : stream_pool(n_solutions_), problem(problem_)
+  pool_allocator_t(const Problem& problem_,
+                   i_t n_solutions_,
+                   rmm::cuda_stream_view stream_,
+                   i_t desired_n_routes = -1)
+    : problem(problem_), stream(stream_)
   {
     raft::common::nvtx::range fun_scope("pool_allocator_t");
     // FIXME:: This is temporary, we should let the diversity manager decide this
@@ -54,8 +57,7 @@ class pool_allocator_t {
     }
     sol_handles.reserve(n_solutions_);
     for (i_t i = 0; i < n_solutions_; ++i) {
-      sol_handles.emplace_back(
-        std::make_unique<solution_handle_t<i_t, f_t>>(stream_pool.get_stream(i)));
+      sol_handles.emplace_back(std::make_unique<solution_handle_t<i_t, f_t>>(stream));
     }
     Solution dummy_sol{problem_, 0, sol_handles[0].get()};
     resource_pool =
@@ -68,22 +70,10 @@ class pool_allocator_t {
     }
   }
 
-  void sync_all_streams() const
-  {
-    for (size_t i = 0; i < stream_pool.get_pool_size(); ++i) {
-      stream_pool.get_stream(i).synchronize();
-    }
-  }
-
-  // a stream pool that will be used to execute different solutions on
-  // we are currently not using raft handles stream pool as it is constructed in python layer
-  // TODO: later consider using raft stream pool and construct it on python layer
-  // however that pushes some internal logic to the higher levels which we want to avoid
-  // rmm::cuda_stream_pool is non-movable as it contains an atomic variables
-  // KEEP THIS MEMBER ABOVE OTHER MEMBERS, so that it is destructed the last
-  rmm::cuda_stream_pool stream_pool;
+  void sync_all_streams() const { stream.synchronize(); }
 
   // problem description
+  rmm::cuda_stream_view stream;
   const Problem& problem;
   std::vector<std::unique_ptr<solution_handle_t<i_t, f_t>>> sol_handles;
   // keep a thread safe pool of local search and ges objects that can be reused
diff --git a/cpp/src/routing/solution/solution.cu b/cpp/src/routing/solution/solution.cu
index eeed02568..edd3bef9a 100644
--- a/cpp/src/routing/solution/solution.cu
+++ b/cpp/src/routing/solution/solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -735,7 +735,7 @@ std::vector<i_t> solution_t<i_t, f_t, REQUEST>::get_unserviced_nodes() const
   std::vector<i_t> unserviced_nodes;
   unserviced_nodes.reserve(get_num_orders());
   const bool depot_included = problem_ptr->order_info.depot_included_;
-  auto h_route_id_per_node  = host_copy(route_node_map.route_id_per_node);
+  auto h_route_id_per_node  = host_copy(route_node_map.route_id_per_node, sol_handle->get_stream());
   for (size_t i = 0; i < h_route_id_per_node.size(); ++i) {
     if (h_route_id_per_node[i] == -1) {
       if (i > 0 || !depot_included) { unserviced_nodes.push_back(i); }
diff --git a/cpp/src/routing/utilities/cython.cu b/cpp/src/routing/utilities/cython.cu
index 74b3776f4..d727ab5b7 100644
--- a/cpp/src/routing/utilities/cython.cu
+++ b/cpp/src/routing/utilities/cython.cu
@@ -1,16 +1,20 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #include <cuopt/routing/cython/cython.hpp>
 #include <cuopt/routing/solve.hpp>
+#include <raft/common/nvtx.hpp>
 #include <raft/core/handle.hpp>
 #include <rmm/device_buffer.hpp>
 #include <routing/generator/generator.hpp>
 
+#include <omp.h>
+#include <chrono>
+
 namespace cuopt {
 namespace cython {
 
@@ -86,6 +90,68 @@ std::unique_ptr<vehicle_routing_ret_t> call_solve(
   return std::make_unique<vehicle_routing_ret_t>(std::move(vr_ret));
 }
 
+/**
+ * @brief Wrapper for batch vehicle_routing to expose the API to cython
+ *
+ * @param data_models Vector of data model pointers
+ * @param settings  Composable solver settings object
+ * @return std::vector<std::unique_ptr<vehicle_routing_ret_t>>
+ */
+std::vector<std::unique_ptr<vehicle_routing_ret_t>> call_batch_solve(
+  std::vector<routing::data_model_view_t<int, float>*> data_models,
+  routing::solver_settings_t<int, float>* settings)
+{
+  const std::size_t size = data_models.size();
+  std::vector<std::unique_ptr<vehicle_routing_ret_t>> list(size);
+
+  // Use OpenMP for parallel execution
+  const int max_thread = std::min(static_cast<int>(size), omp_get_max_threads());
+  rmm::cuda_stream_pool stream_pool(size, rmm::cuda_stream::flags::non_blocking);
+
+#pragma omp parallel for num_threads(max_thread)
+  for (std::size_t i = 0; i < size; ++i) {
+    auto old_stream = data_models[i]->get_handle_ptr()->get_stream();
+    // Make sure previous operations are finished
+    data_models[i]->get_handle_ptr()->sync_stream();
+
+    // Set new non blocking stream for current data model
+    raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), stream_pool.get_stream(i));
+    auto routing_solution = cuopt::routing::solve(*data_models[i], *settings);
+
+    // Make sure current solve is finished
+    stream_pool.get_stream(i).synchronize();
+
+    // Create buffers and reassociate them with the original stream so they
+    // outlive the local stream which will be destroyed at end of loop iteration
+    auto make_buffer = [old_stream = old_stream](rmm::device_buffer&& buf) {
+      buf.set_stream(old_stream);
+      return std::make_unique<rmm::device_buffer>(std::move(buf));
+    };
+
+    vehicle_routing_ret_t vr_ret{routing_solution.get_vehicle_count(),
+                                 routing_solution.get_total_objective(),
+                                 routing_solution.get_objectives(),
+                                 make_buffer(routing_solution.get_route().release()),
+                                 make_buffer(routing_solution.get_order_locations().release()),
+                                 make_buffer(routing_solution.get_arrival_stamp().release()),
+                                 make_buffer(routing_solution.get_truck_id().release()),
+                                 make_buffer(routing_solution.get_node_types().release()),
+                                 make_buffer(routing_solution.get_unserviced_nodes().release()),
+                                 make_buffer(routing_solution.get_accepted().release()),
+                                 routing_solution.get_status(),
+                                 routing_solution.get_status_string(),
+                                 routing_solution.get_error_status().get_error_type(),
+                                 routing_solution.get_error_status().what()};
+    list[i] = std::make_unique<vehicle_routing_ret_t>(std::move(vr_ret));
+
+    // Restore the old stream
+    raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), old_stream);
+    old_stream.synchronize();
+  }
+
+  return list;
+}
+
 /**
  * @brief Wrapper for dataset_t to expose the API to cython.
  * @param solver Composable solver object
diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp
index 24158816a..2b1890728 100644
--- a/cpp/src/utilities/copy_helpers.hpp
+++ b/cpp/src/utilities/copy_helpers.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -150,36 +150,6 @@ inline auto host_copy(bool const* device_ptr, size_t size, rmm::cuda_stream_view
   return h_bool_vec;
 }
 
-/**
- * @brief Simple utility function to copy device_uvector to host
- *
- * @tparam T
- * @param device_vec
- * @param stream_view
- * @return auto
- */
-template <typename T>
-auto host_copy(rmm::device_uvector<T> const& device_vec)
-{
-  return host_copy(device_vec.data(), device_vec.size(), device_vec.stream());
-}
-
-/**
- * @brief Simple utility function to copy device_uvector to host
- *
- * @tparam T
- * @param device_vec
- * @return auto
- */
-template <typename T, typename Allocator>
-auto host_copy(rmm::device_uvector<T> const& device_vec)
-{
-  std::vector<T, Allocator> host_vec(device_vec.size());
-  raft::copy(host_vec.data(), device_vec.data(), device_vec.size(), device_vec.stream());
-  device_vec.stream().synchronize();
-  return host_vec;
-}
-
 /**
  * @brief Simple utility function to copy device_uvector to host
  *
@@ -369,8 +339,9 @@ template <typename f_t, typename f_t2>
 std::tuple<std::vector<f_t>, std::vector<f_t>> extract_host_bounds(
   const rmm::device_uvector<f_t2>& variable_bounds, const raft::handle_t* handle_ptr)
 {
-  rmm::device_uvector<f_t> var_lb(variable_bounds.size(), handle_ptr->get_stream());
-  rmm::device_uvector<f_t> var_ub(variable_bounds.size(), handle_ptr->get_stream());
+  auto stream = handle_ptr->get_stream();
+  rmm::device_uvector<f_t> var_lb(variable_bounds.size(), stream);
+  rmm::device_uvector<f_t> var_ub(variable_bounds.size(), stream);
   thrust::transform(
     handle_ptr->get_thrust_policy(),
     variable_bounds.begin(),
@@ -378,8 +349,8 @@ std::tuple<std::vector<f_t>, std::vector<f_t>> extract_host_bounds(
     thrust::make_zip_iterator(thrust::make_tuple(var_lb.begin(), var_ub.begin())),
     [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); });
   handle_ptr->sync_stream();
-  auto h_var_lb = cuopt::host_copy(var_lb);
-  auto h_var_ub = cuopt::host_copy(var_ub);
+  auto h_var_lb = cuopt::host_copy(var_lb, stream);
+  auto h_var_ub = cuopt::host_copy(var_ub, stream);
   return std::make_tuple(h_var_lb, h_var_ub);
 }
 
diff --git a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
index 18d49e3b8..9f73c5035 100644
--- a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
+++ b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -127,16 +127,17 @@ TEST(SolverSettingsTest, warm_start_smaller_vector)
                                         -1);
   solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping);
 
+  auto stream = handle_.get_stream();
   std::vector<double> h_current_primal_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream);
   std::vector<double> h_initial_primal_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream);
   std::vector<double> h_current_ATY =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream);
   std::vector<double> h_sum_primal_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream);
   std::vector<double> h_last_restart_duality_gap_primal_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_);
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream);
 
   EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size());
   EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size());
@@ -151,13 +152,13 @@ TEST(SolverSettingsTest, warm_start_smaller_vector)
   EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected);
 
   std::vector<double> h_current_dual_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream);
   std::vector<double> h_initial_dual_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream);
   std::vector<double> h_sum_dual_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream);
   std::vector<double> h_last_restart_duality_gap_dual_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_);
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream);
 
   EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size());
   EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size());
@@ -227,16 +228,17 @@ TEST(SolverSettingsTest, warm_start_bigger_vector)
                                         -1);
   solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping);
 
+  auto stream = handle_.get_stream();
   std::vector<double> h_current_primal_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream);
   std::vector<double> h_initial_primal_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream);
   std::vector<double> h_current_ATY =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream);
   std::vector<double> h_sum_primal_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream);
   std::vector<double> h_last_restart_duality_gap_primal_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_);
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream);
 
   EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size());
   EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size());
@@ -251,13 +253,13 @@ TEST(SolverSettingsTest, warm_start_bigger_vector)
   EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected);
 
   std::vector<double> h_current_dual_solution =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream);
   std::vector<double> h_initial_dual_average =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream);
   std::vector<double> h_sum_dual_solutions =
-    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_);
+    cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream);
   std::vector<double> h_last_restart_duality_gap_dual_solution = cuopt::host_copy(
-    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_);
+    solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream);
 
   EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size());
   EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size());
diff --git a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
index de3e82fdb..11dbdba4b 100644
--- a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
+++ b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -38,7 +38,7 @@ static void test_objective_sanity(
   double objective_value,
   double epsilon = tolerance)
 {
-  const auto primal_vars = host_copy(primal_solution);
+  const auto primal_vars = host_copy(primal_solution, primal_solution.stream());
   const auto& c_vector   = op_problem.get_objective_coefficients();
   std::vector<double> out(primal_vars.size());
   std::transform(primal_vars.cbegin(),
@@ -62,7 +62,8 @@ static void test_constraint_sanity(
   double epsilon        = tolerance,
   bool presolve_enabled = false)
 {
-  const std::vector<double> primal_vars              = host_copy(solution.get_primal_solution());
+  const std::vector<double> primal_vars =
+    host_copy(solution.get_primal_solution(), solution.get_primal_solution().stream());
   const std::vector<double>& values                  = op_problem.get_constraint_matrix_values();
   const std::vector<int>& indices                    = op_problem.get_constraint_matrix_indices();
   const std::vector<int>& offsets                    = op_problem.get_constraint_matrix_offsets();
diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu
index 2b2f3f576..e1d66ac21 100644
--- a/cpp/tests/mip/elim_var_remap_test.cu
+++ b/cpp/tests/mip/elim_var_remap_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -120,8 +120,8 @@ void test_elim_var_remap(std::string test_instance)
 
   sub_problem.post_process_solution(sol);
 
-  auto golden_full_assignment       = host_copy(full_assignment);
-  auto fixed_sub_problem_assignment = host_copy(sol.assignment);
+  auto golden_full_assignment       = host_copy(full_assignment, handle_.get_stream());
+  auto fixed_sub_problem_assignment = host_copy(sol.assignment, handle_.get_stream());
 
   EXPECT_EQ(op_problem.get_n_variables(), fixed_sub_problem_assignment.size());
 
diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu
index 20f359fcb..019585d90 100644
--- a/cpp/tests/mip/load_balancing_test.cu
+++ b/cpp/tests/mip/load_balancing_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -49,8 +49,9 @@ std::tuple<std::vector<int>, std::vector<double>, std::vector<double>> select_k_
   auto seed = std::random_device{}();
   std::cerr << "Tested with seed " << seed << "\n";
   problem.compute_n_integer_vars();
-  auto v_bnd      = host_copy(problem.variable_bounds);
-  auto int_var_id = host_copy(problem.integer_indices);
+  auto stream     = problem.handle_ptr->get_stream();
+  auto v_bnd      = host_copy(problem.variable_bounds, stream);
+  auto int_var_id = host_copy(problem.integer_indices, stream);
   int_var_id.erase(
     std::remove_if(
       int_var_id.begin(),
@@ -106,10 +107,11 @@ bounds_probe_results(detail::bound_presolve_t<int, double>& bnd_prb_0,
   bnd_prb_1.solve(problem, probe_second);
   bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1));
 
-  auto h_lb_0 = host_copy(b_lb_0);
-  auto h_ub_0 = host_copy(b_ub_0);
-  auto h_lb_1 = host_copy(b_lb_1);
-  auto h_ub_1 = host_copy(b_ub_1);
+  auto stream = problem.handle_ptr->get_stream();
+  auto h_lb_0 = host_copy(b_lb_0, stream);
+  auto h_ub_0 = host_copy(b_ub_0, stream);
+  auto h_lb_1 = host_copy(b_lb_1, stream);
+  auto h_ub_1 = host_copy(b_ub_1, stream);
   return std::make_tuple(
     std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1));
 }
@@ -151,12 +153,13 @@ void test_multi_probe(std::string path)
     rmm::device_uvector<double> b_ub(problem.n_variables, problem.handle_ptr->get_stream());
     bnd_prb.set_updated_bounds(problem.handle_ptr, make_span(b_lb), make_span(b_ub));
 
-    auto h_lb = host_copy(b_lb);
-    auto h_ub = host_copy(b_ub);
+    auto stream = problem.handle_ptr->get_stream();
+    auto h_lb   = host_copy(b_lb, stream);
+    auto h_ub   = host_copy(b_ub, stream);
 
     lb_prs.solve(probe_first);
 
-    auto bnds = host_copy(lb_prs.vars_bnd);
+    auto bnds = host_copy(lb_prs.vars_bnd, stream);
     for (int i = 0; i < (int)h_lb.size(); ++i) {
       EXPECT_DOUBLE_EQ(bnds[2 * i], h_lb[i]);
       EXPECT_DOUBLE_EQ(bnds[2 * i + 1], h_ub[i]);
diff --git a/cpp/tests/mip/mip_utils.cuh b/cpp/tests/mip/mip_utils.cuh
index 7d5683998..19c44b2fd 100644
--- a/cpp/tests/mip/mip_utils.cuh
+++ b/cpp/tests/mip/mip_utils.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -21,7 +21,7 @@ static void test_variable_bounds(
 {
   const double* lower_bound_ptr = problem.get_variable_lower_bounds().data();
   const double* upper_bound_ptr = problem.get_variable_upper_bounds().data();
-  auto host_assignment          = cuopt::host_copy(solution);
+  auto host_assignment          = cuopt::host_copy(solution, solution.stream());
   double* assignment_ptr        = host_assignment.data();
   cuopt_assert(host_assignment.size() == problem.get_variable_lower_bounds().size(), "");
   cuopt_assert(host_assignment.size() == problem.get_variable_upper_bounds().size(), "");
@@ -81,7 +81,7 @@ static void test_constraint_sanity_per_row(
   const std::vector<double>& variable_upper_bounds   = op_problem.get_variable_upper_bounds();
   std::vector<double> residual(constraint_lower_bounds.size(), 0.0);
   std::vector<double> viol(constraint_lower_bounds.size(), 0.0);
-  auto h_solution = cuopt::host_copy(solution);
+  auto h_solution = cuopt::host_copy(solution, solution.stream());
   // CSR SpMV
   for (size_t i = 0; i < offsets.size() - 1; ++i) {
     for (int j = offsets[i]; j < offsets[i + 1]; ++j) {
diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu
index cb960425f..9a933c054 100644
--- a/cpp/tests/mip/multi_probe_test.cu
+++ b/cpp/tests/mip/multi_probe_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -49,7 +49,7 @@ std::tuple<std::vector<int>, std::vector<double>, std::vector<double>> select_k_
   std::cerr << "Tested with seed " << seed << "\n";
   problem.compute_n_integer_vars();
   auto [v_lb, v_ub] = extract_host_bounds<double>(problem.variable_bounds, problem.handle_ptr);
-  auto int_var_id   = host_copy(problem.integer_indices);
+  auto int_var_id   = host_copy(problem.integer_indices, problem.handle_ptr->get_stream());
   int_var_id.erase(
     std::remove_if(int_var_id.begin(),
                    int_var_id.end(),
@@ -106,10 +106,11 @@ bounds_probe_results(detail::bound_presolve_t<int, double>& bnd_prb_0,
   bnd_prb_1.solve(problem, probe_second);
   bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1));
 
-  auto h_lb_0 = host_copy(b_lb_0);
-  auto h_ub_0 = host_copy(b_ub_0);
-  auto h_lb_1 = host_copy(b_lb_1);
-  auto h_ub_1 = host_copy(b_ub_1);
+  auto stream = problem.handle_ptr->get_stream();
+  auto h_lb_0 = host_copy(b_lb_0, stream);
+  auto h_ub_0 = host_copy(b_ub_0, stream);
+  auto h_lb_1 = host_copy(b_lb_1, stream);
+  auto h_ub_1 = host_copy(b_ub_1, stream);
   return std::make_tuple(
     std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1));
 }
@@ -121,17 +122,18 @@ multi_probe_results(
   const std::tuple<std::vector<int>, std::vector<double>, std::vector<double>>& probe_tuple)
 {
   prb.solve(problem, probe_tuple);
-  rmm::device_uvector<double> m_lb_0(problem.n_variables, problem.handle_ptr->get_stream());
-  rmm::device_uvector<double> m_ub_0(problem.n_variables, problem.handle_ptr->get_stream());
-  rmm::device_uvector<double> m_lb_1(problem.n_variables, problem.handle_ptr->get_stream());
-  rmm::device_uvector<double> m_ub_1(problem.n_variables, problem.handle_ptr->get_stream());
+  auto stream = problem.handle_ptr->get_stream();
+  rmm::device_uvector<double> m_lb_0(problem.n_variables, stream);
+  rmm::device_uvector<double> m_ub_0(problem.n_variables, stream);
+  rmm::device_uvector<double> m_lb_1(problem.n_variables, stream);
+  rmm::device_uvector<double> m_ub_1(problem.n_variables, stream);
   prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_0), make_span(m_ub_0), 0);
   prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_1), make_span(m_ub_1), 1);
 
-  auto h_lb_0 = host_copy(m_lb_0);
-  auto h_ub_0 = host_copy(m_ub_0);
-  auto h_lb_1 = host_copy(m_lb_1);
-  auto h_ub_1 = host_copy(m_ub_1);
+  auto h_lb_0 = host_copy(m_lb_0, stream);
+  auto h_ub_0 = host_copy(m_ub_0, stream);
+  auto h_lb_1 = host_copy(m_lb_1, stream);
+  auto h_ub_1 = host_copy(m_ub_1, stream);
   return std::make_tuple(
     std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1));
 }
@@ -170,15 +172,16 @@ void test_multi_probe(std::string path)
   auto [m_lb_0, m_ub_0, m_lb_1, m_ub_1] =
     multi_probe_results(multi_probe_prs, problem, probe_tuple);
 
-  auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity);
-  auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity);
-  auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity);
-  auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity);
+  auto stream        = problem.handle_ptr->get_stream();
+  auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity, stream);
+  auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity, stream);
+  auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity, stream);
+  auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity, stream);
 
-  auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity);
-  auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity);
-  auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity);
-  auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity);
+  auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity, stream);
+  auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity, stream);
+  auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity, stream);
+  auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity, stream);
 
   for (int i = 0; i < (int)bnd_min_act_0.size(); ++i) {
     EXPECT_DOUBLE_EQ(bnd_min_act_0[i], mlp_min_act_0[i]);
diff --git a/cpp/tests/mip/presolve_test.cu b/cpp/tests/mip/presolve_test.cu
index d27dd1db9..893602e20 100644
--- a/cpp/tests/mip/presolve_test.cu
+++ b/cpp/tests/mip/presolve_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -44,7 +44,7 @@ TEST(problem, find_implied_integers)
   auto problem = detail::problem_t<int, double>(result->reduced_problem);
   problem.set_implied_integers(result->implied_integer_indices);
   ASSERT_TRUE(result->implied_integer_indices.size() > 0);
-  auto var_types = host_copy(problem.variable_types);
+  auto var_types = host_copy(problem.variable_types, handle_.get_stream());
   // Find the index of the one continuous variable
   auto it = std::find_if(var_types.begin(), var_types.end(), [](var_t var_type) {
     return var_type == var_t::CONTINUOUS;
diff --git a/cpp/tests/mip/problem_test.cu b/cpp/tests/mip/problem_test.cu
index 7113e265b..f884fda10 100644
--- a/cpp/tests/mip/problem_test.cu
+++ b/cpp/tests/mip/problem_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -234,8 +234,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
     fill_problem(op_problem);
     cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
 
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, handle.get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, handle.get_stream());
 
     EXPECT_EQ(constraints_lower_bounds[0], 1.0);
     EXPECT_EQ(constraints_upper_bounds[0], 1.0);
@@ -252,8 +254,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
     op_problem.set_constraint_upper_bounds(upper, 1);
     cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
 
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, handle.get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, handle.get_stream());
     EXPECT_EQ(constraints_lower_bounds[0], 2.0);
     EXPECT_EQ(constraints_upper_bounds[0], 3.0);
   }
@@ -270,8 +274,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds)
     fill_problem(op_problem);
     cuopt::linear_programming::detail::problem_t<int, double> problem(op_problem);
 
-    const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds);
-    const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds);
+    const auto constraints_lower_bounds =
+      host_copy(problem.constraint_lower_bounds, handle.get_stream());
+    const auto constraints_upper_bounds =
+      host_copy(problem.constraint_upper_bounds, handle.get_stream());
     EXPECT_EQ(constraints_lower_bounds[0], 2.0);
     EXPECT_EQ(constraints_upper_bounds[0], 3.0);
   }
diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu
index eb7e4bb3b..f9d76611d 100644
--- a/cpp/tests/mip/unit_test.cu
+++ b/cpp/tests/mip/unit_test.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -180,7 +180,7 @@ TEST(LPTest, TestSampleLP2)
   ASSERT_EQ(result.get_primal_solution().size(), 1);
 
   // Copy solution to host to access values
-  auto primal_host = cuopt::host_copy(result.get_primal_solution());
+  auto primal_host = cuopt::host_copy(result.get_primal_solution(), handle.get_stream());
   EXPECT_NEAR(primal_host[0], 0.0, 1e-6);
 
   EXPECT_NEAR(result.get_additional_termination_information().primal_objective, 0.0, 1e-6);
diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu
index b18150899..e27c16624 100644
--- a/cpp/tests/qp/unit_tests/two_variable_test.cu
+++ b/cpp/tests/qp/unit_tests/two_variable_test.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights
  * reserved. SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -65,7 +65,7 @@ TEST(two_variable_test, simple_test)
             cuopt::linear_programming::pdlp_termination_status_t::Optimal);
   EXPECT_NEAR(solution.get_objective_value(), -32.0, 1e-6);
 
-  auto sol_vec = cuopt::host_copy(solution.get_primal_solution());
+  auto sol_vec = cuopt::host_copy(solution.get_primal_solution(), handle.get_stream());
   EXPECT_NEAR(sol_vec[0], 4.0, 1e-6);
   EXPECT_NEAR(sol_vec[1], 2.0, 1e-6);
 }
diff --git a/cpp/tests/routing/CMakeLists.txt b/cpp/tests/routing/CMakeLists.txt
index 0ee757c3e..99cfdb9de 100644
--- a/cpp/tests/routing/CMakeLists.txt
+++ b/cpp/tests/routing/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -32,4 +32,5 @@ ConfigureTest(ROUTING_UNIT_TEST
       ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/prize_collection.cu
       ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/objective_function.cu
       ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/top_k.cu
+      ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/batch_tsp.cu
 )
diff --git a/cpp/tests/routing/routing_test.cuh b/cpp/tests/routing/routing_test.cuh
index 31ad78332..cdafbbf1f 100644
--- a/cpp/tests/routing/routing_test.cuh
+++ b/cpp/tests/routing/routing_test.cuh
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -482,7 +482,7 @@ class base_test_t {
                vehicle_max_times_d.data(),
                vehicle_max_times_d.size(),
                stream_view_);
-    fleet_order_constraints_h = fleet_order_constraints_d.to_host();
+    fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_);
   }
 
   void check_time_windows(host_assignment_t<i_t> const& routing_solution, bool is_soft_tw = false)
@@ -492,7 +492,7 @@ class base_test_t {
     auto truck_id             = routing_solution.truck_id;
     auto locations            = routing_solution.locations;
     auto node_types           = routing_solution.node_types;
-    fleet_order_constraints_h = fleet_order_constraints_d.to_host();
+    fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_);
 
     std::vector<i_t> temp_truck_ids(truck_id);
     auto end_it = std::unique(temp_truck_ids.begin(), temp_truck_ids.end());
diff --git a/cpp/tests/routing/unit_tests/batch_tsp.cu b/cpp/tests/routing/unit_tests/batch_tsp.cu
new file mode 100644
index 000000000..01ae36e70
--- /dev/null
+++ b/cpp/tests/routing/unit_tests/batch_tsp.cu
@@ -0,0 +1,89 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <cuopt/routing/cython/cython.hpp>
+#include <cuopt/routing/solve.hpp>
+#include <utilities/copy_helpers.hpp>
+
+#include <raft/core/handle.hpp>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace cuopt {
+namespace routing {
+namespace test {
+
+using i_t = int;
+using f_t = float;
+
+/**
+ * @brief Creates a small symmetric cost matrix for TSP
+ * @param n_locations Number of locations
+ * @return Cost matrix as a flattened vector
+ */
+std::vector<f_t> create_small_tsp_cost_matrix(i_t n_locations)
+{
+  std::vector<f_t> cost_matrix(n_locations * n_locations, 0.0f);
+
+  // Create a simple distance matrix based on coordinates on a line
+  for (i_t i = 0; i < n_locations; ++i) {
+    for (i_t j = 0; j < n_locations; ++j) {
+      cost_matrix[i * n_locations + j] = static_cast<f_t>(std::abs(i - j));
+    }
+  }
+  return cost_matrix;
+}
+
+/**
+ * @brief Test running TSPs of varying sizes in parallel using call_batch_solve API
+ */
+TEST(batch_tsp, varying_sizes)
+{
+  std::vector<i_t> tsp_sizes = {5, 8, 10, 6, 7, 9};
+  const i_t n_problems       = static_cast<i_t>(tsp_sizes.size());
+
+  // Create handles and cost matrices for each problem
+  std::vector<std::unique_ptr<raft::handle_t>> handles;
+  std::vector<rmm::device_uvector<f_t>> cost_matrices_d;
+  std::vector<std::unique_ptr<cuopt::routing::data_model_view_t<i_t, f_t>>> data_models;
+  std::vector<cuopt::routing::data_model_view_t<i_t, f_t>*> data_model_ptrs;
+
+  for (i_t i = 0; i < n_problems; ++i) {
+    handles.push_back(std::make_unique<raft::handle_t>());
+    auto& handle = *handles.back();
+
+    auto cost_matrix_h = create_small_tsp_cost_matrix(tsp_sizes[i]);
+    cost_matrices_d.push_back(cuopt::device_copy(cost_matrix_h, handle.get_stream()));
+
+    data_models.push_back(std::make_unique<cuopt::routing::data_model_view_t<i_t, f_t>>(
+      &handle, tsp_sizes[i], 1, tsp_sizes[i]));
+    data_models.back()->add_cost_matrix(cost_matrices_d.back().data());
+    data_model_ptrs.push_back(data_models.back().get());
+  }
+
+  // Configure solver settings
+  cuopt::routing::solver_settings_t<i_t, f_t> settings;
+  settings.set_time_limit(5);
+
+  // Call batch solve
+  auto solutions = cuopt::cython::call_batch_solve(data_model_ptrs, &settings);
+
+  // Verify all solutions
+  ASSERT_EQ(solutions.size(), n_problems);
+  for (i_t i = 0; i < n_problems; ++i) {
+    EXPECT_EQ(solutions[i]->status_, cuopt::routing::solution_status_t::SUCCESS)
+      << "TSP " << i << " (size " << tsp_sizes[i] << ") failed";
+    EXPECT_EQ(solutions[i]->vehicle_count_, 1)
+      << "TSP " << i << " (size " << tsp_sizes[i] << ") used multiple vehicles";
+  }
+}
+
+}  // namespace test
+}  // namespace routing
+}  // namespace cuopt
diff --git a/cpp/tests/routing/unit_tests/vehicle_order_match.cu b/cpp/tests/routing/unit_tests/vehicle_order_match.cu
index ba7dbbf0d..22691b3b8 100644
--- a/cpp/tests/routing/unit_tests/vehicle_order_match.cu
+++ b/cpp/tests/routing/unit_tests/vehicle_order_match.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -48,8 +48,9 @@ TEST(vehicle_order_match, two_vehicle_four_orders)
 
   EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS);
 
-  auto route_id = cuopt::host_copy(routing_solution.get_route());
-  auto truck_id = cuopt::host_copy(routing_solution.get_truck_id());
+  auto stream   = handle.get_stream();
+  auto route_id = cuopt::host_copy(routing_solution.get_route(), stream);
+  auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream);
   for (size_t i = 0; i < route_id.size(); ++i) {
     if (route_id[i] == 3 || route_id[i] == 1) { EXPECT_EQ(truck_id[i], 0); }
   }
@@ -71,12 +72,13 @@ TEST(vehicle_order_match, one_order_per_vehicle)
   raft::handle_t handle;
   cuopt::routing::data_model_view_t<i_t, f_t> data_model(&handle, n_locations, n_vehicles);
 
-  auto time_mat_d = cuopt::device_copy(time_mat, handle.get_stream());
+  auto stream     = handle.get_stream();
+  auto time_mat_d = cuopt::device_copy(time_mat, stream);
   data_model.add_cost_matrix(time_mat_d.data());
 
   std::unordered_map<i_t, rmm::device_uvector<i_t>> vehicle_order_match_d;
   for (const auto& [id, orders] : vehicle_order_match) {
-    vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, handle.get_stream()));
+    vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, stream));
   }
 
   for (const auto& [id, orders] : vehicle_order_match_d) {
@@ -87,8 +89,8 @@ TEST(vehicle_order_match, one_order_per_vehicle)
 
   EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS);
 
-  auto route_id = cuopt::host_copy(routing_solution.get_route());
-  auto truck_id = cuopt::host_copy(routing_solution.get_truck_id());
+  auto route_id = cuopt::host_copy(routing_solution.get_route(), stream);
+  auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream);
   for (size_t i = 0; i < route_id.size(); ++i) {
     auto order   = route_id[i];
     auto vehicle = truck_id[i];
diff --git a/python/cuopt/cuopt/routing/__init__.py b/python/cuopt/cuopt/routing/__init__.py
index efa61b477..081d58f99 100644
--- a/python/cuopt/cuopt/routing/__init__.py
+++ b/python/cuopt/cuopt/routing/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from cuopt.routing.assignment import Assignment, SolutionStatus
@@ -9,5 +9,5 @@
     update_routes_and_vehicles,
 )
 from cuopt.routing.utils_wrapper import DatasetDistribution
-from cuopt.routing.vehicle_routing import DataModel, Solve, SolverSettings
+from cuopt.routing.vehicle_routing import BatchSolve, DataModel, Solve, SolverSettings
 from cuopt.routing.vehicle_routing_wrapper import ErrorStatus, Objective
diff --git a/python/cuopt/cuopt/routing/vehicle_routing.pxd b/python/cuopt/cuopt/routing/vehicle_routing.pxd
index 4638f8ae7..7f89d33ff 100644
--- a/python/cuopt/cuopt/routing/vehicle_routing.pxd
+++ b/python/cuopt/cuopt/routing/vehicle_routing.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -9,6 +9,7 @@
 
 from libcpp cimport bool
 from libcpp.string cimport string
+from libcpp.vector cimport vector
 
 from pylibraft.common.handle cimport *
 
@@ -133,3 +134,8 @@ cdef extern from "cuopt/routing/cython/cython.hpp" namespace "cuopt::cython": #
         data_model_view_t[int, float]* data_model,
         solver_settings_t[int, float]* solver_settings
     ) except +
+
+    cdef vector[unique_ptr[vehicle_routing_ret_t]] call_batch_solve(
+        vector[data_model_view_t[int, float] *] data_models,
+        solver_settings_t[int, float]* solver_settings
+    ) except +
diff --git a/python/cuopt/cuopt/routing/vehicle_routing.py b/python/cuopt/cuopt/routing/vehicle_routing.py
index 365709147..990283667 100644
--- a/python/cuopt/cuopt/routing/vehicle_routing.py
+++ b/python/cuopt/cuopt/routing/vehicle_routing.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
@@ -1540,3 +1540,52 @@ def Solve(data_model, solver_settings=None):
             solver_settings.get_config_file_name(),
         )
     return solution
+
+
+@catch_cuopt_exception
+def BatchSolve(data_model_list, solver_settings=None):
+    """
+    Solves multiple routing problems in batch mode using parallel execution.
+
+    Parameters
+    ----------
+    data_model_list: list of DataModel
+        List of data model objects representing routing problems to solve.
+    solver_settings: SolverSettings
+        Settings to configure solver configurations.
+        By default, it uses default solver settings to solve.
+
+    Returns
+    -------
+    tuple
+        A tuple containing:
+        - list of Assignment: Solutions for each routing problem
+
+    Examples
+    --------
+    >>> from cuopt import routing
+    >>> import cudf
+    >>> # Create multiple data models
+    >>> data_models = []
+    >>> for i in range(5):
+    ...     cost_matrix = cudf.DataFrame([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
+    ...     dm = routing.DataModel(3, 1)
+    ...     dm.add_cost_matrix(cost_matrix)
+    ...     data_models.append(dm)
+    >>> settings = routing.SolverSettings()
+    >>> settings.set_time_limit(1.0)
+    >>> solutions, solve_time = routing.BatchSolve(data_models, settings)
+    """
+
+    if not isinstance(data_model_list, list):
+        raise ValueError("data_model_list must be a list of DataModel objects")
+    if len(data_model_list) == 0:
+        raise ValueError("data_model_list cannot be empty")
+    if not all(isinstance(dm, DataModel) for dm in data_model_list):
+        raise ValueError(
+            "All elements in data_model_list must be DataModel instances"
+        )
+    if solver_settings is None:
+        solver_settings = SolverSettings()
+
+    return vehicle_routing_wrapper.BatchSolve(data_model_list, solver_settings)
diff --git a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
index bf4a2570c..c1d4bd01a 100644
--- a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
+++ b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -11,6 +11,7 @@ from pylibraft.common.handle cimport *
 
 from cuopt.routing.structure.routing_utilities cimport *
 from cuopt.routing.vehicle_routing cimport (
+    call_batch_solve,
     call_solve,
     data_model_view_t,
     node_type_t,
@@ -32,8 +33,10 @@ from libc.stdlib cimport free, malloc
 from libc.string cimport memcpy, strcpy, strlen
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
+from libcpp.pair cimport pair
 from libcpp.string cimport string
 from libcpp.utility cimport move
+from libcpp.vector cimport vector
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
@@ -834,3 +837,126 @@ def Solve(DataModel data_model, SolverSettings solver_settings):
         error_message,
         unserviced_nodes
     )
+
+
+cdef create_assignment_from_vr_ret(vehicle_routing_ret_t& vr_ret):
+    """Helper function to create an Assignment from a vehicle_routing_ret_t"""
+    vehicle_count = vr_ret.vehicle_count_
+    total_objective_value = vr_ret.total_objective_value_
+
+    objective_values = {}
+    for k in vr_ret.objective_values_:
+        obj = Objective(int(k.first))
+        objective_values[obj] = k.second
+
+    status = vr_ret.status_
+    cdef char* c_sol_string = c_get_string(vr_ret.solution_string_)
+    try:
+        solver_status_string = \
+            c_sol_string[:vr_ret.solution_string_.length()].decode('UTF-8')
+    finally:
+        free(c_sol_string)
+
+    route = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_route_))
+    route_locations = DeviceBuffer.c_from_unique_ptr(
+        move(vr_ret.d_route_locations_)
+    )
+    arrival_stamp = DeviceBuffer.c_from_unique_ptr(
+        move(vr_ret.d_arrival_stamp_)
+    )
+    truck_id = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_truck_id_))
+    node_types = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_node_types_))
+    unserviced_nodes_buf = \
+        DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_unserviced_nodes_))
+    accepted_buf = \
+        DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_accepted_))
+
+    route_df = cudf.DataFrame()
+    route_df['route'] = series_from_buf(route, pa.int32())
+    route_df['arrival_stamp'] = series_from_buf(arrival_stamp, pa.float64())
+    route_df['truck_id'] = series_from_buf(truck_id, pa.int32())
+    route_df['location'] = series_from_buf(route_locations, pa.int32())
+    route_df['type'] = series_from_buf(node_types, pa.int32())
+
+    unserviced_nodes = cudf.Series._from_column(
+        series_from_buf(unserviced_nodes_buf, pa.int32())
+    )
+    accepted = cudf.Series._from_column(
+        series_from_buf(accepted_buf, pa.int32())
+    )
+
+    def get_type_from_int(type_in_int):
+        if type_in_int == int(NodeType.DEPOT):
+            return "Depot"
+        elif type_in_int == int(NodeType.PICKUP):
+            return "Pickup"
+        elif type_in_int == int(NodeType.DELIVERY):
+            return "Delivery"
+        elif type_in_int == int(NodeType.BREAK):
+            return "Break"
+
+    node_types_string = [
+        get_type_from_int(type_in_int)
+        for type_in_int in route_df['type'].to_pandas()]
+    route_df['type'] = node_types_string
+    error_status = vr_ret.error_status_
+    error_message = vr_ret.error_message_
+
+    return Assignment(
+        vehicle_count,
+        total_objective_value,
+        objective_values,
+        route_df,
+        accepted,
+        <solution_status_t> status,
+        solver_status_string,
+        <error_type_t> error_status,
+        error_message,
+        unserviced_nodes
+    )
+
+
+def BatchSolve(py_data_model_list, SolverSettings solver_settings):
+    """
+    Solve multiple routing problems in batch mode using parallel execution.
+
+    Parameters
+    ----------
+    py_data_model_list : list of DataModel
+        List of data model objects representing routing problems to solve.
+    solver_settings : SolverSettings
+        Solver settings to use for all problems.
+
+    Returns
+    -------
+    tuple
+        A tuple containing:
+        - list of Assignment: Solutions for each routing problem
+        - float: Total solve time in seconds
+    """
+    cdef solver_settings_t[int, float]* c_solver_settings = (
+        solver_settings.c_solver_settings.get()
+    )
+
+    cdef vector[data_model_view_t[int, float] *] data_model_views
+
+    for data_model_obj in py_data_model_list:
+        data_model_views.push_back(
+            (<DataModel>data_model_obj).c_data_model_view.get()
+        )
+
+    cdef vector[unique_ptr[vehicle_routing_ret_t]] batch_solve_result = (
+        move(call_batch_solve(data_model_views, c_solver_settings))
+    )
+
+    cdef vector[unique_ptr[vehicle_routing_ret_t]] c_solutions = (
+        move(batch_solve_result)
+    )
+
+    solutions = []
+    for i in range(c_solutions.size()):
+        solutions.append(
+            create_assignment_from_vr_ret(c_solutions[i].get()[0])
+        )
+
+    return solutions
diff --git a/python/cuopt/cuopt/tests/routing/test_batch_solve.py b/python/cuopt/cuopt/tests/routing/test_batch_solve.py
new file mode 100644
index 000000000..31d09c202
--- /dev/null
+++ b/python/cuopt/cuopt/tests/routing/test_batch_solve.py
@@ -0,0 +1,67 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import cudf
+import numpy as np
+
+from cuopt import routing
+
+
+def create_tsp_cost_matrix(n_locations):
+    """Creates a simple symmetric cost matrix for TSP."""
+    cost_matrix = np.zeros((n_locations, n_locations), dtype=np.float32)
+    for i in range(n_locations):
+        for j in range(n_locations):
+            cost_matrix[i, j] = abs(i - j)
+    return cudf.DataFrame(cost_matrix)
+
+
+def test_batch_solve_varying_sizes():
+    """Test batch solving TSPs of varying sizes."""
+    tsp_sizes = [
+        5,
+        8,
+        10,
+        6,
+        7,
+        9,
+        12,
+        15,
+        11,
+        4,
+        13,
+        14,
+        8,
+        6,
+        10,
+        9,
+        7,
+        11,
+        5,
+        12,
+    ]
+
+    # Create data models for each TSP
+    data_models = []
+    for n_locations in tsp_sizes:
+        cost_matrix = create_tsp_cost_matrix(n_locations)
+        dm = routing.DataModel(n_locations, 1)
+        dm.add_cost_matrix(cost_matrix)
+        data_models.append(dm)
+
+    # Configure solver settings
+    settings = routing.SolverSettings()
+    settings.set_time_limit(5.0)
+
+    # Call batch solve
+    solutions = routing.BatchSolve(data_models, settings)
+
+    # Verify results
+    assert len(solutions) == len(tsp_sizes)
+    for i, solution in enumerate(solutions):
+        assert solution.get_status() == 0, (
+            f"TSP {i} (size {tsp_sizes[i]}) failed"
+        )
+        assert solution.get_vehicle_count() == 1, (
+            f"TSP {i} (size {tsp_sizes[i]}) used multiple vehicles"
+        )