diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index b512944a6..2a4a781c1 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -57,6 +57,9 @@
 #define CUOPT_MIP_HEURISTICS_ONLY         "mip_heuristics_only"
 #define CUOPT_MIP_SCALING                 "mip_scaling"
 #define CUOPT_MIP_PRESOLVE                "mip_presolve"
+#define CUOPT_MIP_CUT_PASSES              "mip_cut_passes"
+#define CUOPT_MIP_NODE_LIMIT              "mip_node_limit"
+#define CUOPT_MIP_RELIABILITY_BRANCHING   "mip_reliability_branching"
 #define CUOPT_SOLUTION_FILE               "solution_file"
 #define CUOPT_NUM_CPU_THREADS             "num_cpu_threads"
 #define CUOPT_NUM_GPUS                    "num_gpus"
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 4f6320752..65a4d4bd0 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -79,8 +79,11 @@ class mip_solver_settings_t {
   tolerances_t tolerances;
 
   f_t time_limit       = std::numeric_limits<f_t>::infinity();
+  i_t node_limit       = std::numeric_limits<i_t>::max();
+  i_t reliability_branching = -1;
   bool heuristics_only = false;
   i_t num_cpu_threads  = -1;  // -1 means use default number of threads in branch and bound
+  i_t max_cut_passes   = 10;  // number of cut passes to make
   i_t num_gpus         = 1;
   bool log_to_console  = true;
   std::string log_file;
diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt
index e8a9b5dce..4528a9b67 100644
--- a/cpp/src/dual_simplex/CMakeLists.txt
+++ b/cpp/src/dual_simplex/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DUAL_SIMPLEX_SRC_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/cuts.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/folding.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp
@@ -34,7 +35,7 @@ set(DUAL_SIMPLEX_SRC_FILES
   )
 
 # Uncomment to enable debug info
-#set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1")
+set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1")
 
 set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES}
   ${DUAL_SIMPLEX_SRC_FILES} PARENT_SCOPE)
diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp
index db24f55a2..3080f269d 100644
--- a/cpp/src/dual_simplex/basis_solves.cpp
+++ b/cpp/src/dual_simplex/basis_solves.cpp
@@ -613,6 +613,8 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
 template <typename i_t, typename f_t>
 i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                 const std::vector<f_t>& lower,
+                 const std::vector<f_t>& upper,
                  const std::vector<i_t>& deficient,
                  const std::vector<i_t>& slacks_needed,
                  std::vector<i_t>& basis_list,
@@ -658,7 +660,15 @@ i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
     nonbasic_list[nonbasic_map[replace_j]] = bad_j;
     vstatus[replace_j]                     = variable_status_t::BASIC;
     // This is the main issue. What value should bad_j take on.
-    vstatus[bad_j] = variable_status_t::NONBASIC_FREE;
+    if (lower[bad_j] == -inf && upper[bad_j] == inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_FREE;
+    } else if (lower[bad_j] > -inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_LOWER;
+    } else if (upper[bad_j] < inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_UPPER;
+    } else {
+      assert(1 == 0);
+    }
   }
 
   return 0;
@@ -849,6 +859,8 @@ template int factorize_basis<int>(const csc_matrix_t<int, double>& A,
 
 template int basis_repair<int, double>(const csc_matrix_t<int, double>& A,
                                        const simplex_solver_settings_t<int, double>& settings,
+                                       const std::vector<double>& lower,
+                                       const std::vector<double>& upper,
                                        const std::vector<int>& deficient,
                                        const std::vector<int>& slacks_needed,
                                        std::vector<int>& basis_list,
diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp
index b668c0f46..0745806a6 100644
--- a/cpp/src/dual_simplex/basis_solves.hpp
+++ b/cpp/src/dual_simplex/basis_solves.hpp
@@ -42,6 +42,8 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
 template <typename i_t, typename f_t>
 i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                 const std::vector<f_t>& lower,
+                 const std::vector<f_t>& upper,
                  const std::vector<i_t>& deficient,
                  const std::vector<i_t>& slacks_needed,
                  std::vector<i_t>& basis_list,
diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 6b79f3c86..115c00744 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1108,6 +1108,212 @@ i_t basis_update_t<i_t, f_t>::lower_triangular_multiply(const csc_matrix_t<i_t,
   return new_nz;
 }
 
+// Start of middle product form: basis_update_mpf_t
+
+template <typename i_t, typename f_t>
+i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic)
+{
+  const i_t m = L0_.m;
+
+  // Solve for U^T W^T = C_B^T
+  // We do this one row at a time of C_B
+  csc_matrix_t<i_t, f_t> WT(m, cuts_basic.m, 0);
+
+  i_t WT_nz = 0;
+  for (i_t k = 0; k < cuts_basic.m; k++) {
+    sparse_vector_t<i_t, f_t> rhs(cuts_basic, k);
+    u_transpose_solve(rhs);
+    WT.col_start[k] = WT_nz;
+    for (i_t q = 0; q < rhs.i.size(); q++) {
+      WT.i.push_back(rhs.i[q]);
+      WT.x.push_back(rhs.x[q]);
+      WT_nz++;
+    }
+  }
+  WT.col_start[cuts_basic.m] = WT_nz;
+
+
+#ifdef CHECK_W
+  {
+    for (i_t k = 0; k < cuts_basic.m; k++) {
+      std::vector<f_t> WT_col(m, 0.0);
+      WT.load_a_column(k, WT_col);
+      std::vector<f_t> CBT_col(m, 0.0);
+      matrix_transpose_vector_multiply(U0_, 1.0, WT_col, 0.0, CBT_col);
+      sparse_vector_t<i_t, f_t> CBT_col_sparse(cuts_basic, k);
+      std::vector<f_t> CBT_col_dense(m);
+      CBT_col_sparse.to_dense(CBT_col_dense);
+      for (i_t h = 0; h < m; h++) {
+        if (std::abs(CBT_col_dense[h] - CBT_col[h]) > 1e-6) {
+          printf("col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n", k, h, CBT_col_dense[h], h, CBT_col[h]);
+          exit(1);
+        }
+      }
+    }
+  }
+#endif
+
+  csc_matrix_t<i_t, f_t> V(cuts_basic.m, m, 0);
+  if (num_updates_ > 0) {
+    // W = V T_0 ... T_{num_updates_ - 1}
+    // or V = W T_{num_updates_ - 1}^{-1} ... T_0^{-1}
+    // or V^T = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T
+    // We can compute V^T column by column so that we have
+    // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // or
+    // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // So we can form V row by row in CSR and then covert it to CSC
+    // for appending to L0
+
+    csr_matrix_t<i_t, f_t> V_row(cuts_basic.m, m, 0);
+    i_t V_nz           = 0;
+    const f_t zero_tol = 1e-13;
+    for (i_t h = 0; h < cuts_basic.m; h++) {
+      sparse_vector_t<i_t, f_t> rhs(WT, h);
+      scatter_into_workspace(rhs);
+      i_t nz = rhs.i.size();
+      for (i_t k = num_updates_ - 1; k >= 0; --k) {
+        // T_k^{-T} = ( I - v u^T/(1 + u^T v))
+        // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu
+
+        const i_t u_col = 2 * k;
+        const i_t v_col = 2 * k + 1;
+        const f_t mu    = mu_values_[k];
+
+        // dot = u^T * b
+        f_t dot         = dot_product(u_col, xi_workspace_, x_workspace_);
+        const f_t theta = dot / mu;
+        if (std::abs(theta) > zero_tol) {
+          add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_);
+        }
+      }
+      gather_into_sparse_vector(nz, rhs);
+      V_row.row_start[h] = V_nz;
+      for (i_t q = 0; q < rhs.i.size(); q++) {
+        V_row.j.push_back(rhs.i[q]);
+        V_row.x.push_back(rhs.x[q]);
+        V_nz++;
+      }
+    }
+    V_row.row_start[cuts_basic.m] = V_nz;
+
+    V_row.to_compressed_col(V);
+
+
+#ifdef CHECK_V
+    csc_matrix_t<i_t, f_t> CB_col(cuts_basic.m, m, 0);
+    cuts_basic.to_compressed_col(CB_col);
+    for (i_t k = 0; k < m; k++) {
+      std::vector<f_t> U_col(m, 0.0);
+      U0_.load_a_column(k, U_col);
+      for (i_t h = num_updates_ - 1; h >= 0; --h) {
+        // T_h = ( I + u_h v_h^T)
+        // T_h * x = x + u_h * v_h^T * x = x + theta * u_h
+        const i_t u_col = 2 * h;
+        const i_t v_col = 2 * h + 1;
+        f_t theta = dot_product(v_col, U_col);
+        const i_t col_start = S_.col_start[u_col];
+        const i_t col_end = S_.col_start[u_col + 1];
+        for (i_t p = col_start; p < col_end; ++p) {
+          const i_t i = S_.i[p];
+          U_col[i] += theta * S_.x[p];
+        }
+      }
+      std::vector<f_t> CB_column(cuts_basic.m, 0.0);
+      matrix_vector_multiply(V, 1.0, U_col, 0.0, CB_column);
+      std::vector<f_t> CB_col_dense(cuts_basic.m);
+      CB_col.load_a_column(k, CB_col_dense);
+      for (i_t l = 0; l < cuts_basic.m; l++) {
+        if (std::abs(CB_col_dense[l] - CB_column[l]) > 1e-6) {
+          printf("col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n", k, l, CB_col_dense[l], l, CB_column[l]);
+          exit(1);
+        }
+      }
+    }
+#endif
+  } else {
+    // W = V
+    WT.transpose(V);
+  }
+
+  // Extend u_i, v_i for i = 0, ..., num_updates_ - 1
+  S_.m += cuts_basic.m;
+
+  // Adjust L and U
+  // L = [ L0  0 ]
+  //     [ V   I ]
+
+  i_t V_nz = V.col_start[m];
+  i_t L_nz = L0_.col_start[m];
+  csc_matrix_t<i_t, f_t> new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m);
+  i_t predicted_nz = L_nz + V_nz + cuts_basic.m;
+  L_nz = 0;
+  for (i_t j = 0; j < m; ++j) {
+    new_L.col_start[j]  = L_nz;
+    const i_t col_start = L0_.col_start[j];
+    const i_t col_end   = L0_.col_start[j + 1];
+    for (i_t p = col_start; p < col_end; ++p) {
+      new_L.i[L_nz] = L0_.i[p];
+      new_L.x[L_nz] = L0_.x[p];
+      L_nz++;
+    }
+    const i_t V_col_start = V.col_start[j];
+    const i_t V_col_end   = V.col_start[j + 1];
+    for (i_t p = V_col_start; p < V_col_end; ++p) {
+      new_L.i[L_nz] = V.i[p] + m;
+      new_L.x[L_nz] = V.x[p];
+      L_nz++;
+    }
+  }
+  for (i_t j = m; j < m + cuts_basic.m; ++j) {
+    new_L.col_start[j] = L_nz;
+    new_L.i[L_nz]      = j;
+    new_L.x[L_nz]      = 1.0;
+    L_nz++;
+  }
+  new_L.col_start[m + cuts_basic.m] = L_nz;
+  if (L_nz != predicted_nz) {
+    printf("L_nz %d predicted_nz %d\n", L_nz, predicted_nz);
+    exit(1);
+  }
+
+  L0_ = new_L;
+
+  // Adjust U
+  // U = [ U0 0 ]
+  //     [ 0  I ]
+
+  i_t U_nz = U0_.col_start[m];
+  U0_.col_start.resize(m + cuts_basic.m + 1);
+  U0_.i.resize(U_nz + cuts_basic.m);
+  U0_.x.resize(U_nz + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    U0_.col_start[k] = U_nz;
+    U0_.i[U_nz]      = k;
+    U0_.x[U_nz]      = 1.0;
+    U_nz++;
+  }
+  U0_.col_start[m + cuts_basic.m] = U_nz;
+  U0_.n                           = m + cuts_basic.m;
+  U0_.m                           = m + cuts_basic.m;
+
+  compute_transposes();
+
+  // Adjust row_permutation_ and inverse_row_permutation_
+  row_permutation_.resize(m + cuts_basic.m);
+  inverse_row_permutation_.resize(m + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    row_permutation_[k] = k;
+  }
+  inverse_permutation(row_permutation_, inverse_row_permutation_);
+
+  // Adjust workspace sizes
+  xi_workspace_.resize(2 * (m + cuts_basic.m), 0);
+  x_workspace_.resize(m + cuts_basic.m, 0.0);
+
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void basis_update_mpf_t<i_t, f_t>::gather_into_sparse_vector(i_t nz,
                                                              sparse_vector_t<i_t, f_t>& out) const
@@ -2046,6 +2252,8 @@ template <typename i_t, typename f_t>
 int basis_update_mpf_t<i_t, f_t>::refactor_basis(
   const csc_matrix_t<i_t, f_t>& A,
   const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<f_t>& lower,
+  const std::vector<f_t>& upper,
   std::vector<i_t>& basic_list,
   std::vector<i_t>& nonbasic_list,
   std::vector<variable_status_t>& vstatus)
@@ -2066,7 +2274,8 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
                       deficient,
                       slacks_needed) == -1) {
     settings.log.debug("Initial factorization failed\n");
-    basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(
+      A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
 
 #ifdef CHECK_BASIS_REPAIR
     const i_t m = A.m;
diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp
index cea907074..b69bcfc2a 100644
--- a/cpp/src/dual_simplex/basis_updates.hpp
+++ b/cpp/src/dual_simplex/basis_updates.hpp
@@ -291,6 +291,8 @@ class basis_update_mpf_t {
     reset_stats();
   }
 
+  i_t append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic);
+
   f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const
   {
     num_calls++;
@@ -373,6 +375,8 @@ class basis_update_mpf_t {
   // Compute L*U = A(p, basic_list)
   int refactor_basis(const csc_matrix_t<i_t, f_t>& A,
                      const simplex_solver_settings_t<i_t, f_t>& settings,
+                     const std::vector<f_t>& lower,
+                     const std::vector<f_t>& upper,
                      std::vector<i_t>& basic_list,
                      std::vector<i_t>& nonbasic_list,
                      std::vector<variable_status_t>& vstatus);
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 6161f4d3f..971ec234b 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -5,10 +5,11 @@
  */
 /* clang-format on */
 
-#include <omp.h>
-#include <algorithm>
-#include <dual_simplex/bounds_strengthening.hpp>
 #include <dual_simplex/branch_and_bound.hpp>
+
+#include <dual_simplex/basis_solves.hpp>
+#include <dual_simplex/cuts.hpp>
+#include <dual_simplex/bounds_strengthening.hpp>
 #include <dual_simplex/crossover.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/logger.hpp>
@@ -20,6 +21,9 @@
 #include <dual_simplex/tic_toc.hpp>
 #include <dual_simplex/user_problem.hpp>
 
+#include <omp.h>
+
+#include <algorithm>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
@@ -224,9 +228,39 @@ branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
 {
   exploration_stats_.start_time = tic();
   dualize_info_t<i_t, f_t> dualize_info;
+#ifdef PRINT_A
+  settings_.log.printf("A");
+  original_problem_.A.print_matrix();
+#endif
   convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info);
   full_variable_types(original_problem_, original_lp_, var_types_);
 
+  num_integer_variables_ = 0;
+  for (i_t j = 0; j < original_lp_.num_cols; j++) {
+    if (var_types_[j] == variable_type_t::INTEGER) {
+      num_integer_variables_++;
+    }
+  }
+  printf("num_integer_variables %d\n", num_integer_variables_);
+
+  // Check slack
+  printf("slacks size %ld m %d\n", new_slacks_.size(), original_lp_.num_rows);
+  for (i_t slack : new_slacks_) {
+    const i_t col_start = original_lp_.A.col_start[slack];
+    const i_t col_end = original_lp_.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Slack %d has %d nzs\n", slack, col_len);
+      exit(1);
+    }
+    const i_t i = original_lp_.A.i[col_start];
+    const f_t x = original_lp_.A.x[col_start];
+    if (std::abs(x) != 1.0) {
+      printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x);
+      exit(1);
+    }
+  }
+
   mutex_upper_.lock();
   upper_bound_ = inf;
   mutex_upper_.unlock();
@@ -265,9 +299,88 @@ i_t branch_and_bound_t<i_t, f_t>::get_heap_size()
   return size;
 }
 
+template <typename i_t, typename f_t>
+void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
+{
+  printf("Finding reduced cost fixings\n");
+  mutex_original_lp_.lock();
+  std::vector<f_t> reduced_costs = root_relax_soln_.z;
+  std::vector<f_t> lower_bounds = original_lp_.lower;
+  std::vector<f_t> upper_bounds = original_lp_.upper;
+  std::vector<bool> bounds_changed(original_lp_.num_cols, false);
+  const f_t root_obj = compute_objective(original_lp_, root_relax_soln_.x);
+  const f_t threshold = 1e-3;
+  const f_t weaken = 1e-5;
+  i_t num_improved = 0;
+  i_t num_fixed = 0;
+  for (i_t j = 0; j < original_lp_.num_cols; j++) {
+    //printf("Variable %d type %d reduced cost %e\n", j, var_types_[j], reduced_costs[j]);
+    if (std::abs(reduced_costs[j]) > threshold) {
+      const f_t lower_j = original_lp_.lower[j];
+      const f_t upper_j = original_lp_.upper[j];
+      const f_t abs_gap = upper_bound - root_obj;
+      f_t reduced_cost_upper_bound = upper_j;
+      f_t reduced_cost_lower_bound = lower_j;
+      if (lower_j > -inf && reduced_costs[j] > 0)
+      {
+        const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j];
+        reduced_cost_upper_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::floor(new_upper_bound + weaken)
+                                      : new_upper_bound;
+        if (reduced_cost_upper_bound < upper_j) {
+          //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound);
+          num_improved++;
+          upper_bounds[j] = reduced_cost_upper_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (upper_j < inf && reduced_costs[j] < 0)
+      {
+        const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j];
+        reduced_cost_lower_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::ceil(new_lower_bound - weaken)
+                                      : new_lower_bound;
+        if (reduced_cost_lower_bound > lower_j) {
+          //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound);
+          num_improved++;
+          lower_bounds[j] = reduced_cost_lower_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (var_types_[j] == variable_type_t::INTEGER && reduced_cost_upper_bound <= reduced_cost_lower_bound)
+      {
+        num_fixed++;
+      }
+    }
+  }
+
+  printf("Reduced costs: Found %d improved bounds and %d fixed variables (%.1f%%)\n", num_improved, num_fixed, 100.0*static_cast<f_t>(num_fixed)/static_cast<f_t>(num_integer_variables_));
+
+  if (num_improved > 0) {
+    lp_problem_t<i_t, f_t> new_lp = original_lp_;
+    new_lp.lower                  = lower_bounds;
+    new_lp.upper                  = upper_bounds;
+    std::vector<char> row_sense;
+    csr_matrix_t<i_t, f_t> Arow(1, 1, 1);
+    original_lp_.A.to_compressed_row(Arow);
+    bounds_strengthening_t<i_t, f_t> node_presolve(new_lp, Arow, row_sense, var_types_);
+    bool feasible = node_presolve.bounds_strengthening(new_lp.lower, new_lp.upper, settings_);
+
+    num_improved = 0;
+    for (i_t j = 0; j < original_lp_.num_cols; j++) {
+      if (new_lp.lower[j] > original_lp_.lower[j]) { num_improved++; }
+      if (new_lp.upper[j] < original_lp_.upper[j]) { num_improved++; }
+    }
+    printf("Bound strengthening: Found %d improved bounds\n", num_improved);
+  }
+
+  mutex_original_lp_.unlock();
+}
+
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solution)
 {
+  mutex_original_lp_.lock();
   if (solution.size() != original_problem_.num_cols) {
     settings_.log.printf(
       "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols);
@@ -276,16 +389,22 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
   crush_primal_solution<i_t, f_t>(
     original_problem_, original_lp_, solution, new_slacks_, crushed_solution);
   f_t obj             = compute_objective(original_lp_, crushed_solution);
+  mutex_original_lp_.unlock();
   bool is_feasible    = false;
   bool attempt_repair = false;
   mutex_upper_.lock();
-  if (obj < upper_bound_) {
+  f_t current_upper_bound = upper_bound_;
+  mutex_upper_.unlock();
+  if (obj < current_upper_bound) {
     f_t primal_err;
     f_t bound_err;
     i_t num_fractional;
+    mutex_original_lp_.lock();
     is_feasible = check_guess(
       original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional);
-    if (is_feasible) {
+    mutex_original_lp_.unlock();
+    mutex_upper_.lock();
+    if (is_feasible && obj < upper_bound_) {
       upper_bound_ = obj;
       incumbent_.set_incumbent_solution(obj, crushed_solution);
     } else {
@@ -300,8 +419,8 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
           num_fractional);
       }
     }
+    mutex_upper_.unlock();
   }
-  mutex_upper_.unlock();
 
   if (is_feasible) {
     if (solver_status_ == mip_exploration_status_t::RUNNING) {
@@ -310,11 +429,13 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
       std::string gap = user_mip_gap<f_t>(user_obj, user_lower);
 
       settings_.log.printf(
-        "H                           %+13.6e    %+10.6e                        %s %9.2f\n",
+        "H                           %+13.6e    %+10.6e                               %s %9.2f\n",
         user_obj,
         user_lower,
         gap.c_str(),
         toc(exploration_stats_.start_time));
+
+      find_reduced_cost_fixings(obj);
     } else {
       settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n",
                            compute_user_objective(original_lp_, obj),
@@ -423,7 +544,7 @@ void branch_and_bound_t<i_t, f_t>::repair_heuristic_solutions()
           std::string user_gap = user_mip_gap<f_t>(obj, lower);
 
           settings_.log.printf(
-            "H                           %+13.6e    %+10.6e                        %s %9.2f\n",
+            "H                           %+13.6e    %+10.6e                              %s %9.2f\n",
             obj,
             lower,
             user_gap.c_str(),
@@ -434,6 +555,8 @@ void branch_and_bound_t<i_t, f_t>::repair_heuristic_solutions()
             uncrush_primal_solution(original_problem_, original_lp_, repaired_solution, original_x);
             settings_.solution_callback(original_x, repaired_obj);
           }
+
+          find_reduced_cost_fixings(obj);
         }
 
         mutex_upper_.unlock();
@@ -480,6 +603,31 @@ mip_status_t branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t
 
   if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) {
     mip_status = mip_status_t::OPTIMAL;
+#if 1
+    FILE* fid = NULL;
+    fid       = fopen("solution.dat", "w");
+    if (fid != NULL) {
+      printf("Writing solution.dat\n");
+
+      std::vector<f_t> residual = original_lp_.rhs;
+      matrix_vector_multiply(original_lp_.A, 1.0, incumbent_.x, -1.0, residual);
+      printf("|| A*x - b ||_inf %e\n", vector_norm_inf<i_t, f_t>(residual));
+      auto hash_combine_f = [](size_t seed, f_t x) {
+        seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+        return seed;
+      };
+      printf("incumbent size %ld original lp cols %d\n", incumbent_.x.size(), original_lp_.num_cols);
+      i_t n = original_lp_.num_cols;
+      size_t seed = n;
+      fprintf(fid, "%d\n", n);
+      for (i_t j = 0; j < n; ++j) {
+        fprintf(fid, "%.17g\n", incumbent_.x[j]);
+        seed = hash_combine_f(seed, incumbent_.x[j]);
+      }
+      printf("Solution hash: %20x\n", seed);
+      fclose(fid);
+    }
+#endif
     if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) {
       settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n",
                            settings_.absolute_mip_gap_tol);
@@ -534,17 +682,19 @@ void branch_and_bound_t<i_t, f_t>::add_feasible_solution(f_t leaf_objective,
     f_t lower_bound = get_lower_bound();
     f_t obj         = compute_user_objective(original_lp_, upper_bound_);
     f_t lower       = compute_user_objective(original_lp_, lower_bound);
-    settings_.log.printf(
-      "%s%10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
-      feasible_solution_symbol(thread_type),
-      nodes_explored,
-      nodes_unexplored,
-      obj,
-      lower,
-      leaf_depth,
-      nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
-      user_mip_gap<f_t>(obj, lower).c_str(),
-      toc(exploration_stats_.start_time));
+    settings_.log.printf("%s%10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
+                         feasible_solution_symbol(thread_type),
+                         nodes_explored,
+                         nodes_unexplored,
+                         obj,
+                         lower,
+                         0,
+                         leaf_depth,
+                         nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
+                         user_mip_gap<f_t>(obj, lower).c_str(),
+                         toc(exploration_stats_.start_time));
+
+    find_reduced_cost_fixings(upper_bound_);
 
     send_solution = true;
   }
@@ -594,6 +744,16 @@ node_solve_info_t branch_and_bound_t<i_t, f_t>::solve_node(
   const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10;
   const f_t upper_bound    = get_upper_bound();
 
+  if (node_ptr->depth > num_integer_variables_) {
+    printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_);
+    mip_node_t<i_t, f_t>* parent = node_ptr->parent;
+    while (parent != nullptr) {
+      printf("Parent depth %d\n", parent->depth);
+      printf("Parent branch var %d dir %d lower %e upper %e\n", parent->branch_var, parent->branch_dir, parent->branch_var_lower, parent->branch_var_upper);
+      parent = parent->parent;
+    }
+  }
+
   lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
   std::vector<variable_status_t>& leaf_vstatus = node_ptr->vstatus;
   assert(leaf_vstatus.size() == leaf_problem.num_cols);
@@ -644,11 +804,11 @@ node_solve_info_t branch_and_bound_t<i_t, f_t>::solve_node(
     node_presolver.bounds_strengthening(leaf_problem.lower, leaf_problem.upper, lp_settings);
 
   dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED;
+  std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
 
   if (feasible) {
     i_t node_iter                    = 0;
     f_t lp_start_time                = tic();
-    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
 
     lp_status = dual_phase2_with_advanced_basis(2,
                                                 0,
@@ -706,6 +866,23 @@ node_solve_info_t branch_and_bound_t<i_t, f_t>::solve_node(
     i_t leaf_num_fractional =
       fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional);
 
+    // Check if any of the fractional variables were fixed to their bounds
+    for (i_t j : leaf_fractional)
+    {
+      if (leaf_problem.lower[j] == leaf_problem.upper[j])
+      {
+        printf(
+          "Node %d: Fixed variable %d has a fractional value %e. Lower %e upper %e. Variable status %d\n",
+          node_ptr->node_id,
+          j,
+          leaf_solution.x[j],
+          leaf_problem.lower[j],
+          leaf_problem.upper[j],
+          leaf_vstatus[j]);
+      }
+    }
+
+
     f_t leaf_objective    = compute_objective(leaf_problem, leaf_solution.x);
     node_ptr->lower_bound = leaf_objective;
     search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective);
@@ -726,12 +903,25 @@ node_solve_info_t branch_and_bound_t<i_t, f_t>::solve_node(
 
     } else if (leaf_objective <= upper_bound + abs_fathom_tol) {
       // Choose fractional variable to branch on
-      const i_t branch_var =
-        pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log);
+
+      i_t branch_var = -1;
+      if (lp_settings.reliability_branching > 0) {
+        branch_var = pc_.reliable_variable_selection(leaf_problem,
+                                                     lp_settings,
+                                                     var_types_,
+                                                     leaf_vstatus,
+                                                     leaf_edge_norms,
+                                                     leaf_fractional,
+                                                     leaf_solution.x,
+                                                     leaf_objective,
+                                                     lp_settings.log);
+      } else {
+        branch_var = pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log);
+      }
 
       assert(leaf_vstatus.size() == leaf_problem.num_cols);
       search_tree.branch(
-        node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, leaf_problem, log);
+        node_ptr, branch_var, leaf_solution.x[branch_var], leaf_num_fractional, leaf_vstatus, leaf_problem, log);
       search_tree.update(node_ptr, node_status_t::HAS_CHILDREN);
 
       rounding_direction_t round_dir = child_selection(node_ptr);
@@ -811,16 +1001,16 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(mip_node_t<i_t, f_t>* nod
       i_t nodes_explored   = exploration_stats_.nodes_explored;
       i_t nodes_unexplored = exploration_stats_.nodes_unexplored;
 
-      settings_.log.printf(
-        " %10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
-        nodes_explored,
-        nodes_unexplored,
-        obj,
-        user_lower,
-        node->depth,
-        nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
-        gap_user.c_str(),
-        now);
+      settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
+                           nodes_explored,
+                           nodes_unexplored,
+                           obj,
+                           user_lower,
+                           node->integer_infeasible,
+                           node->depth,
+                           nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
+                           gap_user.c_str(),
+                           now);
 
       exploration_stats_.nodes_since_last_log = 0;
       exploration_stats_.last_log             = tic();
@@ -941,17 +1131,16 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t task_id,
         std::string gap_user = user_mip_gap<f_t>(obj, user_lower);
         i_t nodes_explored   = exploration_stats_.nodes_explored;
         i_t nodes_unexplored = exploration_stats_.nodes_unexplored;
-
-        settings_.log.printf(
-          " %10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
-          nodes_explored,
-          nodes_unexplored,
-          obj,
-          user_lower,
-          node_ptr->depth,
-          nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
-          gap_user.c_str(),
-          now);
+        settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
+                             nodes_explored,
+                             nodes_unexplored,
+                             obj,
+                             user_lower,
+                             node_ptr->integer_infeasible,
+                             node_ptr->depth,
+                             nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0,
+                             gap_user.c_str(),
+                             now);
         exploration_stats_.last_log             = tic();
         exploration_stats_.nodes_since_last_log = 0;
       }
@@ -1212,19 +1401,28 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
 
 template <typename i_t, typename f_t>
 lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
-  simplex_solver_settings_t<i_t, f_t> const& lp_settings)
+  simplex_solver_settings_t<i_t, f_t> const& lp_settings,
+  lp_solution_t<i_t, f_t>& root_relax_soln,
+  std::vector<variable_status_t>& root_vstatus,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  std::vector<i_t>& basic_list,
+  std::vector<i_t>& nonbasic_list,
+  std::vector<f_t>& edge_norms)
 {
   // Root node path
   lp_status_t root_status;
   std::future<lp_status_t> root_status_future;
   root_status_future = std::async(std::launch::async,
-                                  &solve_linear_program_advanced<i_t, f_t>,
+                                  &solve_linear_program_with_advanced_basis<i_t, f_t>,
                                   std::ref(original_lp_),
                                   exploration_stats_.start_time,
                                   std::ref(lp_settings),
-                                  std::ref(root_relax_soln_),
-                                  std::ref(root_vstatus_),
-                                  std::ref(edge_norms_));
+                                  std::ref(root_relax_soln),
+                                  std::ref(basis_update),
+                                  std::ref(basic_list),
+                                  std::ref(nonbasic_list),
+                                  std::ref(root_vstatus),
+                                  std::ref(edge_norms));
   // Wait for the root relaxation solution to be sent by the diversity manager or dual simplex
   // to finish
   while (!root_crossover_solution_set_.load(std::memory_order_acquire) &&
@@ -1273,13 +1471,46 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
       set_root_concurrent_halt(1);  // Stop dual simplex
       root_status = root_status_future.get();
       // Override the root relaxation solution with the crossover solution
-      root_relax_soln_ = root_crossover_soln_;
-      root_vstatus_    = crossover_vstatus_;
+      root_relax_soln = root_crossover_soln_;
+      root_vstatus    = crossover_vstatus_;
       root_status      = lp_status_t::OPTIMAL;
+
+      // Get the basic list and nonbasic list from the vstatus
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (crossover_vstatus_[j] == variable_status_t::BASIC) {
+          basic_list.push_back(j);
+        } else {
+          nonbasic_list.push_back(j);
+        }
+      }
+      if (basic_list.size() != original_lp_.num_rows) {
+        printf("basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows);
+        exit(1);
+      }
+      if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) {
+        printf("nonbasic_list size %d != n - m %d\n", nonbasic_list.size(), original_lp_.num_cols - original_lp_.num_rows);
+        exit(1);
+      }
+      root_crossover_settings.max_cut_passes = 3;
+      // Populate the basis_update from the crossover vstatus
+      basis_update.refactor_basis(original_lp_.A,
+                                  root_crossover_settings,
+                                  original_lp_.lower,
+                                  original_lp_.upper,
+                                  basic_list,
+                                  nonbasic_list,
+                                  crossover_vstatus_);
+
+      // Set the edge norms to a default value
+      edge_norms.resize(original_lp_.num_cols, -1.0);
+      set_uninitialized_steepest_edge_norms<i_t, f_t>(edge_norms);
+      printf("Using crossover solution\n");
     } else {
+      printf("Using dual simplex solution 1: crossover status %d\n", crossover_status);
       root_status = root_status_future.get();
     }
   } else {
+    printf("Using dual simplex solution\n");
     root_status = root_status_future.get();
   }
   return root_status;
@@ -1295,6 +1526,8 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   exploration_stats_.nodes_unexplored = 0;
   exploration_stats_.nodes_explored   = 0;
 
+  printf("Branch and bound solve called\n");
+
   if (guess_.size() != 0) {
     std::vector<f_t> crushed_guess;
     crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess);
@@ -1315,24 +1548,36 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols);
 
   settings_.log.printf("Solving LP root relaxation\n");
-
-  lp_status_t root_status;
+  i_t original_rows = original_lp_.num_rows;
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.inside_mip                = 1;
-  lp_settings.concurrent_halt           = get_root_concurrent_halt();
-  // RINS/SUBMIP path
+  lp_settings.scale_columns = false;
+  lp_settings.concurrent_halt = get_root_concurrent_halt();
+  std::vector<i_t> basic_list(original_lp_.num_rows);
+  std::vector<i_t> nonbasic_list;
+  basis_update_mpf_t<i_t, f_t> basis_update(original_lp_.num_rows, settings_.refactor_frequency);
+  lp_status_t root_status;
   if (!enable_concurrent_lp_root_solve()) {
-    root_status = solve_linear_program_advanced(original_lp_,
-                                                exploration_stats_.start_time,
-                                                lp_settings,
-                                                root_relax_soln_,
-                                                root_vstatus_,
-                                                edge_norms_);
-
+    printf("Non concurrent LP root solve\n");
+    // RINS/SUBMIP path
+    root_status = solve_linear_program_with_advanced_basis(original_lp_,
+                                                           exploration_stats_.start_time,
+                                                           lp_settings,
+                                                           root_relax_soln_,
+                                                           basis_update,
+                                                           basic_list,
+                                                           nonbasic_list,
+                                                           root_vstatus_,
+                                                           edge_norms_);
   } else {
-    root_status = solve_root_relaxation(lp_settings);
+    root_status = solve_root_relaxation(lp_settings,
+                                        root_relax_soln_,
+                                        root_vstatus_,
+                                        basis_update,
+                                        basic_list,
+                                        nonbasic_list,
+                                        edge_norms_);
   }
-
   exploration_stats_.total_lp_iters      = root_relax_soln_.iterations;
   exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time);
 
@@ -1381,31 +1626,357 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   }
 
   std::vector<i_t> fractional;
-  const i_t num_fractional =
+  i_t num_fractional =
     fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
 
-  if (num_fractional == 0) {
-    mutex_upper_.lock();
-    incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
-    upper_bound_ = root_objective_;
-    mutex_upper_.unlock();
-    // We should be done here
-    uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
-    solution.objective          = incumbent_.objective;
-    solution.lower_bound        = root_objective_;
-    solution.nodes_explored     = 0;
-    solution.simplex_iterations = root_relax_soln_.iterations;
-    settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
-                         compute_user_objective(original_lp_, root_objective_),
-                         toc(exploration_stats_.start_time));
+  csr_matrix_t<i_t, f_t> Arow(1, 1, 1);
+  original_lp_.A.to_compressed_row(Arow);
 
-    if (settings_.solution_callback != nullptr) {
-      settings_.solution_callback(solution.x, solution.objective);
+  solver_status_                     = mip_exploration_status_t::RUNNING;
+  lower_bound_ceiling_        = inf;
+
+  if (num_fractional != 0) {
+    settings_.log.printf(
+      " | Explored | Unexplored |    Objective    |     Bound     | IntInf | Depth | Iter/Node |   Gap    "
+      "|  Time  |\n");
+  }
+
+  cut_pool_t<i_t, f_t> cut_pool(original_lp_.num_cols, settings_);
+  cut_generation_t<i_t, f_t> cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_);
+
+  std::vector<f_t> saved_solution;
+#if 1
+  printf("Trying to open solution.dat\n");
+  FILE* fid = NULL;
+  fid = fopen("solution.dat", "r");
+  if (fid != NULL)
+  {
+    i_t n_solution_dat;
+    i_t count = fscanf(fid, "%d\n", &n_solution_dat);
+    printf("Solution.dat variables %d =? %d =? %ld count %d\n", n_solution_dat, original_lp_.num_cols, solution.x.size(), count);
+    bool good = true;
+    if (count == 1 && n_solution_dat == original_lp_.num_cols)
+    {
+      printf("Opened solution.dat with %d number of variables\n", n_solution_dat);
+      saved_solution.resize(n_solution_dat);
+       for (i_t j = 0; j < n_solution_dat; j++)
+       {
+         count = fscanf(fid, "%lf", &saved_solution[j]);
+         if (count != 1)
+         {
+           printf("bad read solution.dat: j %d count %d\n", j, count);
+           good = false;
+           break;
+         }
+       }
+    } else {
+      good = false;
     }
-    if (settings_.heuristic_preemption_callback != nullptr) {
-      settings_.heuristic_preemption_callback();
+    fclose(fid);
+
+    if (!good)
+    {
+      saved_solution.resize(0);
+      printf("Solution.dat is bad\n");
+    }
+    else
+    {
+      printf("Read solution file\n");
+
+      auto hash_combine_f = [](size_t seed, f_t x) {
+        seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+        return seed;
+      };
+      size_t seed = original_lp_.num_cols;
+      for (i_t j = 0; j < original_lp_.num_cols; ++j)
+      {
+        seed = hash_combine_f(seed, saved_solution[j]);
+      }
+      printf("Saved solution hash: %20x\n", seed);
+
+      FILE* fid = NULL;
+      fid       = fopen("solution.dat.2", "w");
+      if (fid != NULL) {
+        printf("Writing solution.dat.2\n");
+        i_t n = original_lp_.num_cols;
+        size_t seed = n;
+        fprintf(fid, "%d\n", n);
+        for (i_t j = 0; j < n; ++j) {
+          fprintf(fid, "%.17g\n", saved_solution[j]);
+        }
+        fclose(fid);
+      }
+
+      // Compute || A * x - b ||_inf
+      std::vector<f_t> residual = original_lp_.rhs;
+      matrix_vector_multiply(original_lp_.A, 1.0, saved_solution, -1.0, residual);
+      printf("Saved solution: || A*x - b ||_inf %e\n", vector_norm_inf<i_t, f_t>(residual));
+      f_t infeas = 0;
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (saved_solution[j] < original_lp_.lower[j] - 1e-6) {
+          f_t curr_infeas = (original_lp_.lower[j] - saved_solution[j]);
+          infeas += curr_infeas;
+          printf(
+            "j: %d saved solution %e lower %e\n", j, saved_solution[j], original_lp_.lower[j]);
+        }
+        if (saved_solution[j] > original_lp_.upper[j] + 1e-6) {
+          f_t curr_infeas = (saved_solution[j] - original_lp_.upper[j]);
+          infeas += curr_infeas;
+          printf(
+            "j %d saved solution %e upper %e\n", j, saved_solution[j], original_lp_.upper[j]);
+        }
+      }
+      printf("Bound infeasibility %e\n", infeas);
+    }
+  } else {
+    printf("Could not open solution.dat\n");
+  }
+#endif
+
+  i_t num_gomory_cuts = 0;
+  i_t num_mir_cuts = 0;
+  i_t num_knapsack_cuts = 0;
+  i_t cut_pool_size = 0;
+  for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
+    if (num_fractional == 0) {
+#ifdef PRINT_SOLUTION
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (var_types_[j] == variable_type_t::INTEGER) {
+          settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]);
+        }
+      }
+#endif
+      mutex_upper_.lock();
+      incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
+      upper_bound_ = root_objective_;
+      mutex_upper_.unlock();
+      // We should be done here
+      uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
+      solution.objective          = incumbent_.objective;
+      solution.lower_bound        = root_objective_;
+      solution.nodes_explored     = 0;
+      solution.simplex_iterations = root_relax_soln_.iterations;
+      settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
+                           compute_user_objective(original_lp_, root_objective_),
+                           toc(exploration_stats_.start_time));
+
+      if (settings_.solution_callback != nullptr) {
+        settings_.solution_callback(solution.x, solution.objective);
+      }
+      if (settings_.heuristic_preemption_callback != nullptr) {
+        settings_.heuristic_preemption_callback();
+      }
+      return mip_status_t::OPTIMAL;
+    } else {
+#ifdef PRINT_FRACTIONAL_INFO
+      settings_.log.printf("Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass);
+      for (i_t j: fractional) {
+        settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]);
+      }
+#endif
+
+      // Generate cuts and add them to the cut pool
+      cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
+
+      // Score the cuts
+      cut_pool.score_cuts(root_relax_soln_.x);
+      // Get the best cuts from the cut pool
+      csr_matrix_t<i_t, f_t> cuts_to_add(0, original_lp_.num_cols, 0);
+      std::vector<f_t> cut_rhs;
+      std::vector<cut_type_t> cut_types;
+      i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
+      if (num_cuts == 0)
+      {
+        //settings_.log.printf("No cuts found\n");
+        break;
+      }
+      for (i_t k = 0; k < cut_types.size(); k++) {
+        if (cut_types[k] == cut_type_t::MIXED_INTEGER_GOMORY) {
+          num_gomory_cuts++;
+        } else if (cut_types[k] == cut_type_t::MIXED_INTEGER_ROUNDING) {
+          num_mir_cuts++;
+        } else if (cut_types[k] == cut_type_t::KNAPSACK) {
+          num_knapsack_cuts++;
+        }
+      }
+
+      cuts_to_add.check_matrix();
+
+#ifdef PRINT_CUTS
+      csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
+      cuts_to_add.to_compressed_col(cuts_to_add_col);
+      cuts_to_add_col.print_matrix();
+      for (i_t i = 0; i < cut_rhs.size(); i++) {
+        printf("cut_rhs[%d] = %g\n", i, cut_rhs[i]);
+      }
+#endif
+
+#if 0
+      f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x);
+      if (min_cut_violation < 1e-6) {
+        settings_.log.printf("Min cut violation %e\n", min_cut_violation);
+      }
+#endif
+
+      // Check against saved solution
+      if (saved_solution.size() > 0) {
+        csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
+        cuts_to_add.to_compressed_col(cuts_to_add_col);
+        std::vector<f_t> Cx(cuts_to_add.m);
+        matrix_vector_multiply(cuts_to_add_col, 1.0, saved_solution, 0.0, Cx);
+        for (i_t k = 0; k < num_cuts; k++) {
+          //printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]);
+          if (Cx[k] > cut_rhs[k] + 1e-6) {
+            printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e\n", k, Cx[k], cut_rhs[k]);
+            exit(1);
+          }
+        }
+      }
+
+      cut_pool_size = cut_pool.pool_size();
+
+      // Resolve the LP with the new cuts
+      settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+                           num_cuts,
+                           cuts_to_add.row_start[cuts_to_add.m],
+                           cut_pool.pool_size(),
+                           cuts_to_add.m + original_lp_.num_rows);
+      lp_settings.log.log = false;
+
+      mutex_original_lp_.lock();
+      i_t add_cuts_status = add_cuts(settings_,
+                                     cuts_to_add,
+                                     cut_rhs,
+                                     original_lp_,
+                                     new_slacks_,
+                                     root_relax_soln_,
+                                     basis_update,
+                                     basic_list,
+                                     nonbasic_list,
+                                     root_vstatus_,
+                                     edge_norms_);
+      mutex_original_lp_.unlock();
+      if (add_cuts_status != 0) {
+        settings_.log.printf("Failed to add cuts\n");
+        exit(1);
+      }
+
+      // Try to do bound strengthening
+      var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS);
+
+      std::vector<bool> bounds_changed(original_lp_.num_cols, true);
+      std::vector<char> row_sense;
+#ifdef CHECK_MATRICES
+      settings_.log.printf("Before A check\n");
+      original_lp_.A.check_matrix();
+#endif
+      original_lp_.A.to_compressed_row(Arow);
+#if 1
+      bounds_strengthening_t<i_t, f_t> node_presolve(original_lp_, Arow, row_sense, var_types_);
+      bool feasible = node_presolve.bounds_strengthening(original_lp_.lower, original_lp_.upper, settings_);
+
+      if (!feasible) {
+        settings_.log.printf("Bound strengthening failed\n");
+        exit(1);
+      }
+#endif
+
+      // Adjust the solution
+      root_relax_soln_.x.resize(original_lp_.num_cols, 0.0);
+      root_relax_soln_.y.resize(original_lp_.num_rows, 0.0);
+      root_relax_soln_.z.resize(original_lp_.num_cols, 0.0);
+
+      // For now just clear the edge norms
+      edge_norms_.clear();
+      i_t iter              = 0;
+      bool initialize_basis = false;
+      lp_settings.concurrent_halt = NULL;
+      dual::status_t cut_status = dual_phase2_with_advanced_basis(2,
+                                                                  0,
+                                                                  initialize_basis,
+                                                                  exploration_stats_.start_time,
+                                                                  original_lp_,
+                                                                  lp_settings,
+                                                                  root_vstatus_,
+                                                                  basis_update,
+                                                                  basic_list,
+                                                                  nonbasic_list,
+                                                                  root_relax_soln_,
+                                                                  iter,
+                                                                  edge_norms_);
+
+      settings_.log.debug("Cut LP iterations %d. A nz %d\n",
+                           iter,
+                           original_lp_.A.col_start[original_lp_.A.n]);
+      exploration_stats_.total_lp_iters += root_relax_soln_.iterations;
+      root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
+
+      if (cut_status != dual::status_t::OPTIMAL) {
+        settings_.log.printf("Cut status %d\n", cut_status);
+        exit(1);
+      }
+
+      local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_);
+
+      mutex_original_lp_.lock();
+      remove_cuts(original_lp_,
+                  settings_,
+                  Arow,
+                  new_slacks_,
+                  original_rows,
+                  var_types_,
+                  root_vstatus_,
+                  root_relax_soln_.x,
+                  root_relax_soln_.y,
+                  root_relax_soln_.z,
+                  basic_list,
+                  nonbasic_list,
+                  basis_update);
+      mutex_original_lp_.unlock();
+
+      fractional.clear();
+      num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
+
+      // TODO: Get upper bound from heuristics
+      f_t upper_bound = get_upper_bound();
+      f_t obj = num_fractional != 0 ? get_upper_bound() : compute_user_objective(original_lp_, root_objective_);
+      f_t user_obj    = compute_user_objective(original_lp_, obj);
+      f_t user_lower  = compute_user_objective(original_lp_, root_objective_);
+      std::string gap = num_fractional != 0 ? user_mip_gap<f_t>(user_obj, user_lower) : "0.0%";
+
+
+      settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d   %7.1e     %s %9.2f\n",
+        0,
+        0,
+        user_obj,
+        user_lower,
+        num_fractional,
+        0,
+        static_cast<f_t>(iter),
+        gap.c_str(),
+        toc(exploration_stats_.start_time));
+    }
+  }
+
+  if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) {
+    settings_.log.printf("Gomory cuts   : %d\n", num_gomory_cuts);
+    settings_.log.printf("MIR cuts      : %d\n", num_mir_cuts);
+    settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts);
+    settings_.log.printf("Cut pool size : %d\n", cut_pool_size);
+    settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]);
+  }
+
+  if (edge_norms_.size() != original_lp_.num_cols)
+  {
+    edge_norms_.resize(original_lp_.num_cols, -1.0);
+  }
+  for (i_t k = 0; k < original_lp_.num_rows; k++)
+  {
+    const i_t j = basic_list[k];
+    if (edge_norms_[j] < 0.0)
+    {
+      edge_norms_[j] = 1e-4;
     }
-    return mip_status_t::OPTIMAL;
   }
 
   pc_.resize(original_lp_.num_cols);
@@ -1434,21 +2005,16 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   search_tree_.branch(&search_tree_.root,
                       branch_var,
                       root_relax_soln_.x[branch_var],
+                      num_fractional,
                       root_vstatus_,
                       original_lp_,
                       log);
 
-  csr_matrix_t<i_t, f_t> Arow(1, 1, 0);
-  original_lp_.A.to_compressed_row(Arow);
-
   settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n",
                        settings_.num_threads,
                        settings_.num_bfs_threads,
                        settings_.num_diving_threads);
 
-  settings_.log.printf(
-    " | Explored | Unexplored |    Objective    |     Bound     | Depth | Iter/Node |   Gap    "
-    "|  Time  |\n");
 
   exploration_stats_.nodes_explored       = 0;
   exploration_stats_.nodes_unexplored     = 2;
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 38438cc9e..5cd35f263 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -115,7 +115,15 @@ class branch_and_bound_t {
   bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; }
   std::atomic<int>* get_root_concurrent_halt() { return &root_concurrent_halt_; }
   void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; }
-  lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings);
+  lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings,
+                                    lp_solution_t<i_t, f_t>& root_relax_soln,
+                                    std::vector<variable_status_t>& root_vstatus,
+                                    basis_update_mpf_t<i_t, f_t>& basis_update,
+                                    std::vector<i_t>& basic_list,
+                                    std::vector<i_t>& nonbasic_list,
+                                    std::vector<f_t>& edge_norms);
+
+  void find_reduced_cost_fixings(f_t upper_bound);
 
   // The main entry routine. Returns the solver status and populates solution with the incumbent.
   mip_status_t solve(mip_solution_t<i_t, f_t>& solution);
@@ -131,10 +139,14 @@ class branch_and_bound_t {
   lp_problem_t<i_t, f_t> original_lp_;
   std::vector<i_t> new_slacks_;
   std::vector<variable_type_t> var_types_;
+  i_t num_integer_variables_;
 
   // Local lower bounds for each thread
   std::vector<omp_atomic_t<f_t>> local_lower_bounds_;
 
+  // Mutex for the original LP
+  omp_mutex_t mutex_original_lp_;
+
   // Mutex for upper bound
   omp_mutex_t mutex_upper_;
 
diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp
index 23d9a0e8e..a8937b70b 100644
--- a/cpp/src/dual_simplex/crossover.cpp
+++ b/cpp/src/dual_simplex/crossover.cpp
@@ -785,8 +785,15 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
           factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
         if (rank != m) {
           settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-          basis_repair(
-            lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+          basis_repair(lp.A,
+                       settings,
+                       lp.lower,
+                       lp.upper,
+                       deficient,
+                       slacks_needed,
+                       basic_list,
+                       nonbasic_list,
+                       vstatus);
           if (factorize_basis(
                 lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
             settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1132,7 +1139,15 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
   rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A,
+                 settings,
+                 lp.lower,
+                 lp.upper,
+                 deficient,
+                 slacks_needed,
+                 basic_list,
+                 nonbasic_list,
+                 vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1323,7 +1338,15 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
         factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
       if (rank != m) {
         settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-        basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+        basis_repair(lp.A,
+                     settings,
+                     lp.lower,
+                     lp.upper,
+                     deficient,
+                     slacks_needed,
+                     basic_list,
+                     nonbasic_list,
+                     vstatus);
         if (factorize_basis(
               lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
           settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1355,18 +1378,22 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
       settings.log.debug("Num flips %d\n", num_flips);
       solution = phase1_solution;
       print_crossover_info(lp, settings, vstatus, solution, "Dual phase 1 complete");
-      std::vector<f_t> edge_norms;
-      dual::status_t status = dual_phase2(
-        2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
-      if (toc(start_time) > settings.time_limit) {
-        settings.log.printf("Time limit exceeded\n");
-        return crossover_status_t::TIME_LIMIT;
-      }
-      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-        settings.log.printf("Concurrent halt\n");
-        return crossover_status_t::CONCURRENT_LIMIT;
+      dual_infeas = dual_infeasibility(lp, settings, vstatus, solution.z);
+      dual::status_t status = dual::status_t::NUMERICAL;
+      if (dual_infeas <= settings.dual_tol) {
+        std::vector<f_t> edge_norms;
+        status = dual_phase2(
+          2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
+        if (toc(start_time) > settings.time_limit) {
+          settings.log.printf("Time limit exceeded\n");
+          return crossover_status_t::TIME_LIMIT;
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+          settings.log.printf("Concurrent halt\n");
+          return crossover_status_t::CONCURRENT_LIMIT;
+        }
+        solution.iterations += iter;
       }
-      solution.iterations += iter;
       primal_infeas = primal_infeasibility(lp, settings, vstatus, solution.x);
       dual_infeas   = dual_infeasibility(lp, settings, vstatus, solution.z);
       primal_res    = primal_residual(lp, solution);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
new file mode 100644
index 000000000..4fd5a8299
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -0,0 +1,1703 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <dual_simplex/cuts.hpp>
+#include <dual_simplex/dense_matrix.hpp>
+
+
+namespace cuopt::linear_programming::dual_simplex {
+
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
+{
+  // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool
+
+  for (i_t p = 0; p < cut.i.size(); p++) {
+    const i_t j = cut.i[p];
+    if (j >= original_vars_) {
+      settings_.log.printf(
+        "Cut has variable %d that is greater than original_vars_ %d\n", j, original_vars_);
+      return;
+    }
+  }
+
+  sparse_vector_t<i_t, f_t> cut_squeezed;
+  cut.squeeze(cut_squeezed);
+  cut_storage_.append_row(cut_squeezed);
+  //settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1);
+  rhs_storage_.push_back(rhs);
+  cut_type_.push_back(cut_type);
+  cut_age_.push_back(0);
+}
+
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_distance(i_t row, const std::vector<f_t>& x, f_t& cut_violation, f_t &cut_norm)
+{
+  const i_t row_start = cut_storage_.row_start[row];
+  const i_t row_end = cut_storage_.row_start[row + 1];
+  f_t cut_x = 0.0;
+  f_t dot = 0.0;
+  for (i_t p = row_start; p < row_end; p++) {
+    const i_t j = cut_storage_.j[p];
+    const f_t cut_coeff = cut_storage_.x[p];
+    cut_x += cut_coeff * x[j];
+    dot += cut_coeff * cut_coeff;
+  }
+  cut_violation = rhs_storage_[row] - cut_x;
+  cut_norm = std::sqrt(dot);
+  const f_t distance = cut_violation / cut_norm;
+  return distance;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_density(i_t row)
+{
+  const i_t row_start = cut_storage_.row_start[row];
+  const i_t row_end = cut_storage_.row_start[row + 1];
+  const i_t cut_nz = row_end - row_start;
+  const i_t original_vars = original_vars_;
+  return static_cast<f_t>(cut_nz) / original_vars;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_orthogonality(i_t i,  i_t j)
+{
+  const i_t i_start = cut_storage_.row_start[i];
+  const i_t i_end = cut_storage_.row_start[i + 1];
+  const i_t i_nz = i_end - i_start;
+  const i_t j_start = cut_storage_.row_start[j];
+  const i_t j_end = cut_storage_.row_start[j + 1];
+  const i_t j_nz = j_end - j_start;
+
+  f_t dot = sparse_dot(cut_storage_.j.data() + i_start, cut_storage_.x.data() + i_start, i_nz,
+                       cut_storage_.j.data() + j_start, cut_storage_.x.data() + j_start, j_nz);
+
+  f_t norm_i = cut_norms_[i];
+  f_t norm_j = cut_norms_[j];
+  return 1.0 - std::abs(dot) / (norm_i * norm_j);
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
+{
+  const f_t weight_distance = 1.0;
+  const f_t weight_orthogonality = 1.0;
+  const f_t min_cut_distance = 1e-4;
+  cut_distances_.resize(cut_storage_.m, 0.0);
+  cut_norms_.resize(cut_storage_.m, 0.0);
+  cut_orthogonality_.resize(cut_storage_.m, 1);
+  cut_scores_.resize(cut_storage_.m, 0.0);
+  for (i_t i = 0; i < cut_storage_.m; i++) {
+    f_t violation;
+    cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]);
+    cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
+    //settings_.log.printf("Cut %d type %d distance %e violation %e orthogonality %e score %e\n", i, static_cast<int>(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
+  }
+
+  std::vector<i_t> sorted_indices(cut_storage_.m);
+  std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
+  std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+    return cut_scores_[a] > cut_scores_[b];
+  });
+
+  std::vector<i_t> indices;
+  indices.reserve(sorted_indices.size());
+
+
+  const i_t max_cuts = 2000;
+  const f_t min_orthogonality = 0.5;
+  best_cuts_.reserve(std::min(max_cuts, cut_storage_.m));
+  best_cuts_.clear();
+  scored_cuts_ = 0;
+
+  while (scored_cuts_ < max_cuts && !sorted_indices.empty()) {
+    const i_t i = sorted_indices[0];
+
+    if (cut_distances_[i] <= min_cut_distance) {
+        //settings_.log.printf("Cut %d distance %e <= %e. Stopping\n", i, cut_distances_[i], min_cut_distance);
+        break;
+    }
+
+    if (cut_age_[i] > 0) {
+        settings_.log.printf("Adding cut with age %d\n", cut_age_[i]);
+    }
+    //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
+
+    best_cuts_.push_back(i);
+    scored_cuts_++;
+
+    // Recompute the orthogonality for the remaining cuts
+    for (i_t k = 1; k < sorted_indices.size(); k++) {
+      const i_t j = sorted_indices[k];
+      cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j));
+      if (cut_orthogonality_[j] >= min_orthogonality) {
+        indices.push_back(j);
+        cut_scores_[j] = cut_distances_[j] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j];
+        //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]);
+      }
+    }
+
+    sorted_indices = indices;
+    indices.clear();
+    //settings_.log.printf("Sorting %d cuts\n", sorted_indices.size());
+
+    std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+        return cut_scores_[a] > cut_scores_[b];
+    });
+    //settings_.log.printf("\t Sorted indicies %d\n", sorted_indices.size());
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs, std::vector<cut_type_t>& best_cut_types)
+{
+  best_cuts.m = 0;
+  best_cuts.n = original_vars_;
+  best_cuts.row_start.clear();
+  best_cuts.j.clear();
+  best_cuts.x.clear();
+  best_cuts.row_start.reserve(scored_cuts_ + 1);
+  best_cuts.row_start.push_back(0);
+
+  for (i_t i: best_cuts_) {
+    sparse_vector_t<i_t, f_t> cut(cut_storage_, i);
+    cut.negate();
+    best_cuts.append_row(cut);
+    //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]);
+    best_rhs.push_back(-rhs_storage_[i]);
+    best_cut_types.push_back(cut_type_[i]);
+  }
+
+  return static_cast<i_t>(best_cuts_.size());
+}
+
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::age_cuts()
+{
+  for (i_t i = 0; i < cut_age_.size(); i++) {
+    cut_age_[i]++;
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::drop_cuts()
+{
+   // TODO: Implement this
+}
+
+template <typename i_t, typename f_t>
+knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types)
+{
+  knapsack_constraints_.reserve(lp.num_rows);
+
+  is_slack_.resize(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack_[j] = 1;
+  }
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    const i_t row_start = Arow.row_start[i];
+    const i_t row_end   = Arow.row_start[i + 1];
+    if (row_end - row_start < 3) { continue; }
+    bool is_knapsack    = true;
+    f_t sum_pos         = 0.0;
+    //printf("i %d ", i);
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t j = Arow.j[p];
+      if (is_slack_[j]) { continue; }
+      const f_t aj = Arow.x[p];
+      //printf(" j %d (%e < %e) aj %e\n", j, lp.lower[j], lp.upper[j], aj);
+      if (std::abs(aj - std::round(aj)) > settings.integer_tol) {
+        is_knapsack = false;
+        break;
+      }
+      if (var_types[j] != variable_type_t::INTEGER || lp.lower[j] != 0.0 || lp.upper[j] != 1.0) {
+        is_knapsack = false;
+        break;
+      }
+      if (aj < 0.0) {
+        is_knapsack = false;
+        break;
+      }
+      sum_pos += aj;
+    }
+   // printf("sum_pos %e\n", sum_pos);
+
+    if (is_knapsack) {
+      const f_t beta = lp.rhs[i];
+      printf("Knapsack constraint %d beta %e sum_pos %e\n", i, beta, sum_pos);
+      if (std::abs(beta - std::round(beta)) <= settings.integer_tol) {
+        if (beta >= 0.0 && beta <= sum_pos) {
+          knapsack_constraints_.push_back(i);
+        }
+      }
+    }
+  }
+
+  i_t num_knapsack_constraints = knapsack_constraints_.size();
+  settings.log.printf("Number of knapsack constraints %d\n", num_knapsack_constraints);
+}
+
+template <typename i_t, typename f_t>
+i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar,
+  i_t knapsack_row,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  // Get the row associated with the knapsack constraint
+  sparse_vector_t<i_t, f_t> knapsack_inequality(Arow, knapsack_row);
+  f_t knapsack_rhs = lp.rhs[knapsack_row];
+
+  // Remove the slacks from the inequality
+  f_t seperation_rhs = 0.0;
+  printf(" Knapsack : ");
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (is_slack_[j]) {
+      knapsack_inequality.x[k] = 0.0;
+    } else {
+      printf(" %g x%d +", knapsack_inequality.x[k], j);
+      seperation_rhs += knapsack_inequality.x[k];
+    }
+  }
+  printf(" <= %g\n", knapsack_rhs);
+  seperation_rhs -= (knapsack_rhs + 1);
+
+  printf("\t");
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+        if (std::abs(xstar[j]) > 1e-3) {
+          printf("x_relax[%d]= %g ", j, xstar[j]);
+        }
+    }
+  }
+  printf("\n");
+
+  printf("seperation_rhs %g\n", seperation_rhs);
+  if (seperation_rhs <= 0.0) { return -1; }
+
+  std::vector<f_t> values;
+  values.resize(knapsack_inequality.i.size() - 1);
+  std::vector<f_t> weights;
+  weights.resize(knapsack_inequality.i.size() - 1);
+  i_t h                  = 0;
+  f_t objective_constant = 0.0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      const f_t vj = 1.0 - xstar[j];
+      objective_constant += vj;
+      values[h]  = vj;
+      weights[h] = knapsack_inequality.x[k];
+      h++;
+    }
+  }
+  std::vector<f_t> solution;
+  solution.resize(knapsack_inequality.i.size() - 1);
+
+  printf("Calling solve_knapsack_problem\n");
+  f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution);
+  if (objective != objective) { return -1; }
+  printf("objective %e objective_constant %e\n", objective, objective_constant);
+
+  f_t seperation_value = -objective + objective_constant;
+  printf("seperation_value %e\n", seperation_value);
+  const f_t tol = 1e-6;
+  if (seperation_value >= 1.0 - tol) { return -1; }
+
+  i_t cover_size = 0;
+  for (i_t k = 0; k < solution.size(); k++) {
+    if (solution[k] == 0.0) { cover_size++; }
+  }
+
+  cut.i.clear();
+  cut.x.clear();
+  cut.i.reserve(cover_size);
+  cut.x.reserve(cover_size);
+
+  h = 0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      if (solution[h] == 0.0) {
+        cut.i.push_back(j);
+        cut.x.push_back(-1.0);
+      }
+      h++;
+    }
+  }
+  cut_rhs = -cover_size + 1;
+  cut.sort();
+
+  // The cut is in the form: - sum_{j in cover} x_j >= -cover_size + 1
+  // Which is equivalent to: sum_{j in cover} x_j <= cover_size - 1
+
+  // Verify the cut is violated
+  f_t dot = cut.dot(xstar);
+  f_t violation = dot - cut_rhs;
+  printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation);
+
+  if (violation <= tol) { return -1; }
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::greedy_knapsack_problem(const std::vector<f_t>& values,
+                                                             const std::vector<f_t>& weights,
+                                                             f_t rhs,
+                                                             std::vector<f_t>& solution)
+{
+  i_t n = weights.size();
+  solution.assign(n, 0.0);
+
+  // Build permutation
+  std::vector<i_t> perm(n);
+  std::iota(perm.begin(), perm.end(), 0);
+
+  std::vector<f_t> ratios;
+  ratios.resize(n);
+  for (i_t i = 0; i < n; i++) {
+    ratios[i] = values[i] / weights[i];
+  }
+
+  // Sort by value / weight ratio
+  std::sort(perm.begin(), perm.end(), [&](i_t i, i_t j) { return ratios[i] > ratios[j]; });
+
+  // Greedy select items with the best value / weight ratio until the remaining capacity is exhausted
+  f_t remaining   = rhs;
+  f_t total_value = 0.0;
+
+  for (i_t j : perm) {
+    if (weights[j] <= remaining) {
+      solution[j] = 1.0;
+      remaining -= weights[j];
+      total_value += values[j];
+    }
+  }
+
+  // Best single-item fallback
+  f_t best_single_value = 0.0;
+  i_t best_single_idx   = -1;
+
+  for (i_t j = 0; j < n; ++j) {
+    if (weights[j] <= rhs && values[j] > best_single_value) {
+      best_single_value = values[j];
+      best_single_idx   = j;
+    }
+  }
+
+  if (best_single_value > total_value) {
+    solution.assign(n, 0.0);
+    solution[best_single_idx] = 1.0;
+    return best_single_value;
+  }
+
+  return total_value;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::solve_knapsack_problem(const std::vector<f_t>& values,
+                                                            const std::vector<f_t>& weights,
+                                                            f_t rhs,
+                                                            std::vector<f_t>& solution)
+{
+  // Solve the knapsack problem
+  // maximize sum_{j=0}^n values[j] * solution[j]
+  // subject to sum_{j=0}^n weights[j] * solution[j] <= rhs
+  // values: values of the items
+  // weights: weights of the items
+  // return the value of the solution
+
+  // Using approximate dynamic programming
+
+  i_t n = weights.size();
+  f_t objective = std::numeric_limits<f_t>::quiet_NaN();
+
+  // Compute the maximum value
+  f_t vmax = *std::max_element(values.begin(), values.end());
+
+  // Check if all the values are integers
+  bool all_integers = true;
+  const f_t integer_tol = 1e-5;
+  for (i_t j = 0; j < n; j++) {
+    if (std::abs(values[j] - std::round(values[j])) > integer_tol) {
+        all_integers = false;
+        break;
+    }
+  }
+
+  printf("all_integers %d\n", all_integers);
+
+  // Compute the scaling factor and comptue the scaled integer values
+  f_t scale = 1.0;
+  std::vector<i_t> scaled_values(n);
+  if (all_integers) {
+    for (i_t j = 0; j < n; j++) {
+      scaled_values[j] = static_cast<i_t>(std::floor(values[j]));
+    }
+  } else {
+    const f_t epsilon = 0.1;
+    scale             = epsilon * vmax / static_cast<f_t>(n);
+    if (scale <= 0.0) { return std::numeric_limits<f_t>::quiet_NaN(); }
+    printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n);
+    for (i_t i = 0; i < n; ++i) {
+      scaled_values[i] = static_cast<i_t>(std::floor(values[i] / scale));
+      //printf("scaled_values[%d] %d values[%d] %g\n", i, scaled_values[i], i, values[i]);
+    }
+  }
+
+  i_t sum_value = std::accumulate(scaled_values.begin(), scaled_values.end(), 0);
+  const i_t INT_INF = std::numeric_limits<i_t>::max() / 2;
+  printf("sum value %d\n", sum_value);
+  const i_t max_size = 10000;
+  if (sum_value <= 0.0 || sum_value >= max_size) {
+    printf("sum value %d is negative or too large using greedy solution\n", sum_value);
+    return greedy_knapsack_problem(values, weights, rhs, solution);
+  }
+
+  // dp(j, v) = minimum weight using first j items to get value v
+  dense_matrix_t<i_t, i_t> dp(n + 1, sum_value + 1, INT_INF);
+  dense_matrix_t<i_t, uint8_t> take(n + 1, sum_value + 1, 0);
+  dp(0, 0) = 0;
+  printf("start dp\n");
+
+  // 4. Dynamic programming
+  for (int j = 1; j <= n; ++j) {
+    for (int v = 0; v <= sum_value; ++v) {
+      // Do not take item i-1
+      dp(j, v) = dp(j - 1, v);
+
+      // Take item j-1 if possible
+      if (v >= scaled_values[j - 1]) {
+        i_t candidate = dp(j - 1, v - scaled_values[j - 1]) + static_cast<i_t>(std::floor(weights[j - 1]));
+        if (candidate < dp(j, v)) {
+          dp(j, v)   = candidate;
+          take(j, v) = 1;
+        }
+      }
+    }
+  }
+
+  // 5. Find best achievable value within capacity
+  i_t best_value = 0;
+  for (i_t v = 0; v <= sum_value; ++v) {
+    if (dp(n, v) <= rhs) { best_value = v; }
+  }
+
+  // 6. Backtrack to recover solution
+  i_t v = best_value;
+  for (i_t j = n; j >= 1; --j) {
+    if (take(j, v)) {
+      solution[j - 1] = 1.0;
+      v -= scaled_values[j - 1];
+    } else {
+      solution[j - 1] = 0.0;
+    }
+  }
+
+  objective = best_value * scale;
+  return objective;
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                                               csr_matrix_t<i_t, f_t>& Arow,
+                                               const std::vector<i_t>& new_slacks,
+                                               const std::vector<variable_type_t>& var_types,
+                                               basis_update_mpf_t<i_t, f_t>& basis_update,
+                                               const std::vector<f_t>& xstar,
+                                               const std::vector<i_t>& basic_list,
+                                               const std::vector<i_t>& nonbasic_list)
+{
+  // Generate Gomory Cuts
+  generate_gomory_cuts(
+    lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
+
+  // Generate Knapsack cuts
+  generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
+  //settings.log.printf("Generated Knapsack cuts\n");
+
+ // Generate MIR cuts
+  generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar)
+{
+  if (knapsack_generation_.num_knapsack_constraints() > 0) {
+    for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) {
+      sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+      f_t cut_rhs;
+      i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts(
+        lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs);
+      if (knapsack_status == 0) {
+        settings.log.printf("Adding Knapsack cut %d\n", knapsack_row);
+        cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs);
+      } else {
+        settings.log.printf("Knapsack cut %d is not violated. Skipping\n", knapsack_row);
+      }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
+                                                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                                                   csr_matrix_t<i_t, f_t>& Arow,
+                                                   const std::vector<i_t>& new_slacks,
+                                                   const std::vector<variable_type_t>& var_types,
+                                                   const std::vector<f_t>& xstar)
+{
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
+  mir.initialize(lp, new_slacks, xstar);
+
+  std::vector<i_t> slack_map(lp.num_rows);
+  for (i_t slack : new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end = lp.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Generate MIR cuts: Slack %d has %d nzs in column\n", slack, col_len);
+      exit(1);
+    }
+    const i_t i = lp.A.i[col_start];
+    slack_map[i] = slack;
+  }
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    sparse_vector_t<i_t, f_t> inequality(Arow, i);
+    f_t inequality_rhs = lp.rhs[i];
+
+    const i_t row_start = Arow.row_start[i];
+    const i_t row_end = Arow.row_start[i + 1];
+    i_t slack = slack_map[i];
+
+    // Remove the slack from the equality to get an inequality
+    for (i_t k = 0; k < inequality.i.size(); k++) {
+      const i_t j = inequality.i[k];
+      if (j == slack) { inequality.x[k] = 0.0; }
+    }
+
+    // inequaility'*x <= inequality_rhs
+    // But for MIR we need: inequality'*x >= inequality_rhs
+    inequality_rhs *= -1;
+    inequality.negate();
+
+    sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+    f_t cut_rhs;
+    i_t mir_status =
+      mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
+    bool add_cut = false;
+    const f_t min_cut_distance = 1e-4;
+    if (mir_status == 0) {
+      if (cut.i.size() == 0) {
+        continue;
+      }
+      mir.substitute_slacks(lp, Arow, cut, cut_rhs);
+      if (cut.i.size() == 0) {
+        continue;
+      }
+       // Check that the cut is violated
+       // The cut is of the form cut'*x >= cut_rhs
+       // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar
+       f_t dot      = cut.dot(xstar);
+       f_t cut_norm = cut.norm2_squared();
+       if (dot < cut_rhs && cut_norm > 0.0) {
+        // Cut is violated. Compute it's distance
+         f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm);
+         if (cut_distance > min_cut_distance) {
+           add_cut = true;
+         }
+       }
+    }
+    if (add_cut) {
+      cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
+    }
+  }
+}
+
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list)
+{
+  mixed_integer_gomory_base_inequality_t<i_t, f_t> gomory(lp, basis_update, nonbasic_list);
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
+
+  mir.initialize(lp, new_slacks, xstar);
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    sparse_vector_t<i_t, f_t> inequality(lp.num_cols, 0);
+    f_t inequality_rhs;
+    const i_t j = basic_list[i];
+    if (var_types[j] != variable_type_t::INTEGER) { continue; }
+    const f_t x_j = xstar[j];
+    if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; }
+    i_t gomory_status = gomory.generate_base_inequality(lp,
+                                                        settings,
+                                                        Arow,
+                                                        var_types,
+                                                        basis_update,
+                                                        xstar,
+                                                        basic_list,
+                                                        nonbasic_list,
+                                                        i,
+                                                        inequality,
+                                                        inequality_rhs);
+    if (gomory_status == 0) {
+      // Given the base inequality, generate a MIR cut
+      sparse_vector_t<i_t, f_t> cut_A(lp.num_cols, 0);
+      f_t cut_A_rhs;
+      i_t mir_status =
+        mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_A, cut_A_rhs);
+      bool A_valid = false;
+      f_t cut_A_distance = 0.0;
+      if (mir_status == 0) {
+        if (cut_A.i.size() == 0) {
+          settings.log.printf("No coefficients in cut A\n");
+          continue;
+        }
+        mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs);
+        if (cut_A.i.size() == 0) {
+          settings.log.printf("No coefficients in cut A after substituting slacks\n");
+          A_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_A.dot(xstar);
+          f_t cut_norm = cut_A.norm2_squared();
+          if (dot >= cut_A_rhs) {
+            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            continue;
+          }
+          cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
+          A_valid        = true;
+        }
+        //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs);
+      }
+
+      // Negate the base inequality
+      inequality.negate();
+      inequality_rhs *= -1;
+
+      sparse_vector_t<i_t, f_t> cut_B(lp.num_cols, 0);
+      f_t cut_B_rhs;
+
+      mir_status =
+        mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_B, cut_B_rhs);
+      bool B_valid = false;
+      f_t cut_B_distance = 0.0;
+      if (mir_status == 0) {
+        if (cut_B.i.size() == 0) {
+          settings.log.printf("No coefficients in cut B\n");
+          continue;
+        }
+        mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs);
+        if (cut_B.i.size() == 0) {
+          settings.log.printf("No coefficients in cut B after substituting slacks\n");
+          B_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_B.dot(xstar);
+          f_t cut_norm = cut_B.norm2_squared();
+          if (dot >= cut_B_rhs) {
+            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            continue;
+          }
+          cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);
+          B_valid        = true;
+        }
+        // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs);
+      }
+
+      if ((cut_A_distance > cut_B_distance) && A_valid) {
+        //printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs);
+      } else if (B_valid) {
+        //printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs);
+      }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_gomory_base_inequality_t<i_t, f_t>::generate_base_inequality(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list,
+  i_t i,
+  sparse_vector_t<i_t, f_t>& inequality,
+  f_t& inequality_rhs)
+{
+  // Let's look for Gomory cuts
+    const i_t j = basic_list[i];
+    if (var_types[j] != variable_type_t::INTEGER) { return -1; }
+    const f_t x_j = xstar[j];
+    if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { return -1; }
+#ifdef PRINT_CUT_INFO
+    settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
+#endif
+#ifdef PRINT_BASIS
+    for (i_t h = 0; h < basic_list.size(); h++) {
+      settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+    }
+#endif
+
+    // Solve B^T u_bar = e_i
+    sparse_vector_t<i_t, f_t> e_i(lp.num_rows, 1);
+    e_i.i[0] = i;
+    e_i.x[0] = 1.0;
+    sparse_vector_t<i_t, f_t> u_bar(lp.num_rows, 0);
+    basis_update.b_transpose_solve(e_i, u_bar);
+
+
+#ifdef CHECK_B_TRANSPOSE_SOLVE
+    std::vector<f_t> u_bar_dense(lp.num_rows);
+    u_bar.to_dense(u_bar_dense);
+
+    std::vector<f_t> BTu_bar(lp.num_rows);
+    b_transpose_multiply(lp, basic_list, u_bar_dense, BTu_bar);
+    for (i_t k = 0; k < lp.num_rows; k++) {
+      if (k == i) {
+        if (std::abs(BTu_bar[k] - 1.0) > 1e-6) {
+          settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+          exit(1);
+        }
+      } else {
+        if (std::abs(BTu_bar[k]) > 1e-6) {
+          settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+          exit(1);
+        }
+      }
+    }
+#endif
+
+    // Compute a_bar = N^T u_bar
+    // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused.
+    const i_t nz_ubar = u_bar.i.size();
+    std::vector<i_t> abar_indices;
+    abar_indices.reserve(nz_ubar);
+    for (i_t k = 0; k < nz_ubar; k++) {
+      const i_t ii        = u_bar.i[k];
+      const f_t u_bar_i   = u_bar.x[k];
+      const i_t row_start = Arow.row_start[ii];
+      const i_t row_end   = Arow.row_start[ii + 1];
+      for (i_t p = row_start; p < row_end; p++) {
+        const i_t jj = Arow.j[p];
+        if (nonbasic_mark_[jj] == 1) {
+          x_workspace_[jj] += u_bar_i * Arow.x[p];
+          if (!x_mark_[jj]) {
+            x_mark_[jj] = 1;
+            abar_indices.push_back(jj);
+          }
+        }
+      }
+    }
+
+    sparse_vector_t<i_t, f_t> a_bar(lp.num_cols, abar_indices.size() + 1);
+    for (i_t k = 0; k < abar_indices.size(); k++) {
+      const i_t jj = abar_indices[k];
+      a_bar.i[k]   = jj;
+      a_bar.x[k]   = x_workspace_[jj];
+    }
+
+    // Clear the workspace
+    for (i_t jj : abar_indices) {
+      x_workspace_[jj] = 0.0;
+      x_mark_[jj]      = 0;
+    }
+    abar_indices.clear();
+
+    // We should now have the base inequality
+    // x_j + a_bar^T x_N >= b_bar_i
+    // We add x_j into a_bar so that everything is in a single sparse_vector_t
+    a_bar.i[a_bar.i.size() - 1] = j;
+    a_bar.x[a_bar.x.size() - 1] = 1.0;
+
+#ifdef CHECK_A_BAR_DENSE_DOT
+    std::vector<f_t> a_bar_dense(lp.num_cols);
+    a_bar.to_dense(a_bar_dense);
+
+    f_t a_bar_dense_dot = dot<i_t, f_t>(a_bar_dense, xstar);
+    if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) {
+      settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
+      settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
+      exit(1);
+    }
+#endif
+
+    // We have that x_j + a_bar^T x_N == b_bar_i
+    // So x_j + a_bar^T x_N >= b_bar_i
+    // And x_j + a_bar^T x_N <= b_bar_i
+    // Or -x_j - a_bar^T x_N >= -b_bar_i
+
+#ifdef PRINT_CUT
+    {
+      settings_.log.printf("Cut %d\n", i);
+      for (i_t k = 0; k < a_bar.i.size(); k++) {
+        const i_t jj = a_bar.i[k];
+        const f_t aj = a_bar.x[k];
+        settings_.log.printf("(%d, %e) ", jj, aj);
+      }
+      settings_.log.printf("\nEnd cut %d b_bar[%d] = %e\n", i, b_bar[i]);
+    }
+#endif
+
+    // Skip cuts that are shallow
+    const f_t shallow_tol = 1e-2;
+    if (std::abs(x_j - std::round(x_j)) < shallow_tol) {
+      //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j);
+      return -1;
+    }
+
+    const f_t f_val = b_bar_[i] - std::floor(b_bar_[i]);
+    if (f_val < 0.01 || f_val > 0.99) {
+      //settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val);
+      return -1;
+    }
+
+#ifdef PRINT_BASE_INEQUALITY
+    // Print out the base inequality
+    for (i_t k = 0; k < a_bar.i.size(); k++) {
+      const i_t jj = a_bar.i[k];
+      const f_t aj = a_bar.x[k];
+      settings_.log.printf("a_bar[%d] = %e\n", k, aj);
+    }
+    settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
+#endif
+
+    inequality = a_bar;
+    inequality_rhs = b_bar_[i];
+
+    return 0;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t, f_t>& lp,
+                                                        const std::vector<i_t>& new_slacks,
+                                                        const std::vector<f_t>& xstar)
+{
+
+  if (lp.num_cols != num_vars_) {
+    num_vars_ = lp.num_cols;
+    x_workspace_.resize(num_vars_, 0.0);
+    x_mark_.resize(num_vars_, 0);
+    has_lower_.resize(num_vars_, 0);
+    has_upper_.resize(num_vars_, 0);
+  }
+
+  is_slack_.clear();
+  is_slack_.resize(num_vars_, 0);
+  slack_rows_.clear();
+  slack_rows_.resize(num_vars_, 0);
+
+  for (i_t j : new_slacks) {
+    is_slack_[j] = 1;
+    const i_t col_start = lp.A.col_start[j];
+    const i_t i = lp.A.i[col_start];
+    slack_rows_[j] = i;
+    if (std::abs(lp.A.x[col_start]) != 1.0) {
+      printf("Initialize: Slack row %d has non-unit coefficient %e for variable %d\n", i, lp.A.x[col_start], j);
+      exit(1);
+    }
+  }
+
+  needs_complement_ = false;
+  for (i_t j = 0; j < lp.num_cols; j++) {
+    if (lp.lower[j] < 0) {
+      settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]);
+      //exit(1);
+    }
+    const f_t uj = lp.upper[j];
+    const f_t lj = lp.lower[j];
+    if (uj != inf || lj != 0.0) { needs_complement_ = true; }
+    const f_t xstar_j = xstar[j];
+    if (uj < inf) {
+      if (uj - xstar_j <= xstar_j - lj) {
+        has_upper_[j] = 1;
+      } else {
+        has_lower_[j] = 1;
+      }
+      continue;
+    }
+
+    if (lj > -inf) { has_lower_[j] = 1; }
+  }
+
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Initialize: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Initialize: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
+  const sparse_vector_t<i_t, f_t>& a,
+  f_t beta,
+  const std::vector<f_t>& upper_bounds,
+  const std::vector<f_t>& lower_bounds,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      printf("num_vars_ %d\n", num_vars_);
+      printf("x_workspace_.size() %ld\n", x_workspace_.size());
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+
+  auto f = [](f_t q_1, f_t q_2) -> f_t {
+    f_t q_1_hat = q_1 - std::floor(q_1);
+    f_t q_2_hat = q_2 - std::floor(q_2);
+    return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+  };
+
+  auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(a.i.size());
+  f_t R;
+  if (!needs_complement_) {
+    R = (beta - std::floor(beta)) * std::ceil(beta);
+
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      f_t aj       = a.x[k];
+      if (var_types[jj] == variable_type_t::INTEGER) {
+        x_workspace_[jj] += f(aj, beta);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        x_workspace_[jj] += h(aj);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      }
+    }
+  } else {
+    // Compute r
+    f_t r = beta;
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        r -= uj * a.x[k];
+        continue;
+      }
+      if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        r -= lj * a.x[k];
+      }
+    }
+
+    // Compute R
+    R = std::ceil(r) * (r - std::floor(r));
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R -= f(-aj, r) * uj;
+        } else {
+          R -= h(-aj) * uj;
+        }
+      } else if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R += f(aj, r) * lj;
+        } else {
+          R += h(aj) * lj;
+        }
+      }
+    }
+
+    // Compute the cut coefficients
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          // Upper intersect I
+          x_workspace_[jj] -= f(-aj, r);
+          if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        } else {
+          // Upper intersect C
+          f_t h_j = h(-aj);
+          if (h_j != 0.0) {
+            x_workspace_[jj] -= h_j;
+            if (!x_mark_[jj]) {
+              x_mark_[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          }
+        }
+      } else if (var_types[jj] == variable_type_t::INTEGER) {
+        // I \ Upper
+        x_workspace_[jj] += f(aj, r);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        // C \ Upper
+        f_t h_j = h(aj);
+        if (h_j != 0.0) {
+          x_workspace_[jj] += h_j;
+          if (!x_mark_[jj]) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        }
+      }
+    }
+  }
+
+  cut.i.reserve(cut_indices.size());
+  cut.x.reserve(cut_indices.size());
+  for (i_t k = 0; k < cut_indices.size(); k++) {
+    const i_t jj = cut_indices[k];
+
+    // Check for small coefficients
+    const f_t aj = x_workspace_[jj];
+    if (std::abs(aj) < 1e-6) {
+      if (aj >= 0.0 && upper_bounds[jj] < inf) {
+        // Move this to the right-hand side
+        R -= aj * upper_bounds[jj];
+        continue;
+      } else if (aj <= 0.0 && lower_bounds[jj] > -inf) {
+        R += aj * lower_bounds[jj];
+        continue;
+      } else {
+      }
+    }
+    cut.i.push_back(jj);
+    cut.x.push_back(x_workspace_[jj]);
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+  // The new cut is: g'*x >= R
+  // But we want to have it in the form h'*x <= b
+  cut.sort();
+
+  cut_rhs = R;
+
+  if (cut.i.size() == 0) {
+    //settings_.log.printf("MIR: No coefficients in cut\n");
+    return -1;
+  }
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                                                               csr_matrix_t<i_t, f_t>& Arow,
+                                                               sparse_vector_t<i_t, f_t>& cut,
+                                                               f_t& cut_rhs)
+{
+  // Remove slacks from the cut
+  // So that the cut is only over the original variables
+  bool found_slack = false;
+  i_t cut_nz = 0;
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(cut.i.size());
+
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+
+
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j  = cut.i[k];
+    const f_t cj = cut.x[k];
+    if (is_slack_[j]) {
+      found_slack = true;
+      const i_t slack_start = lp.A.col_start[j];
+      const i_t slack_end = lp.A.col_start[j + 1];
+      const i_t slack_len = slack_end - slack_start;
+      if (slack_len != 1) {
+        printf("Slack %d has %d nzs in colum\n", j, slack_len);
+        exit(1);
+      }
+      const f_t alpha = lp.A.x[slack_start];
+      if (std::abs(alpha) != 1.0) {
+        printf("Slack %d has non-unit coefficient %e\n", j, alpha);
+        exit(1);
+      }
+
+      // Do the substitution
+      // Slack variable s_j participates in row i of the constraint matrix
+      // Row i is of the form:
+      // sum_{k != j} A(i, k) * x_k + alpha * s_j = rhs_i
+      // where alpha = +1/-1
+      /// So we have that
+      // s_j = (rhs_i - sum_{k != j} A(i, k) * x_k)/alpha
+
+      // Our cut is of the form:
+      // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs
+      // So the cut becomes
+      // sum_{k != j} C(k) * x_k + C(j)/alpha * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs
+      // This is equivalent to:
+      // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j)/alpha * A(i, h) * x_h >= cut_rhs - C(j)/alpha * rhs_i
+      const i_t i         = slack_rows_[j];
+      //printf("Found slack %d in cut. lo %e up %e. Slack row %d\n", j, lp.lower[j], lp.upper[j], i);
+      cut_rhs -= cj * lp.rhs[i] / alpha;
+      const i_t row_start = Arow.row_start[i];
+      const i_t row_end   = Arow.row_start[i + 1];
+      for (i_t q = row_start; q < row_end; q++) {
+        const i_t h = Arow.j[q];
+        if (h != j) {
+          const f_t aih = Arow.x[q];
+          x_workspace_[h] -= cj * aih / alpha;
+          if (!x_mark_[h]) {
+            x_mark_[h] = 1;
+            cut_indices.push_back(h);
+            cut_nz++;
+          }
+        } else {
+            const f_t aij = Arow.x[q];
+            if (std::abs(aij)!= 1.0) {
+                printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j);
+                exit(1);
+            }
+        }
+      }
+
+    } else {
+      x_workspace_[j] += cj;
+      if (!x_mark_[j]) {
+        x_mark_[j] = 1;
+        cut_indices.push_back(j);
+        cut_nz++;
+      }
+    }
+  }
+
+  if (found_slack) {
+    //printf("Found slack. Nz increased from %d to %d: %d\n", cut.i.size(), cut_nz, cut_nz - cut.i.size());
+    cut.i.reserve(cut_nz);
+    cut.x.reserve(cut_nz);
+    cut.i.clear();
+    cut.x.clear();
+
+    for (i_t k = 0; k < cut_nz; k++) {
+      const i_t j = cut_indices[k];
+
+      // Check for small coefficients
+      const f_t aj = x_workspace_[j];
+      if (std::abs(aj) < 1e-6) {
+        if (aj >= 0.0 && lp.upper[j] < inf) {
+          // Move this to the right-hand side
+          cut_rhs -= aj * lp.upper[j];
+          continue;
+        } else if (aj <= 0.0 && lp.lower[j] > -inf) {
+          cut_rhs += aj * lp.lower[j];
+          continue;
+        } else {
+        }
+      }
+
+      cut.i.push_back(j);
+      cut.x.push_back(x_workspace_[j]);
+    }
+    // Sort the cut
+    cut.sort();
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+}
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms)
+
+{
+  // Given a set of cuts: C*x <= d that are currently violated
+  // by the current solution x* (i.e. C*x* > d), this function
+  // adds the cuts into the LP and solves again.
+
+#ifdef CHECK_BASIS
+  {
+    csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+    basis_update.multiply_lu(Btest);
+    csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+    form_b(lp.A, basic_list, B);
+    csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+    add(Btest, B, 1.0, -1.0, Diff);
+    const f_t err = Diff.norm1();
+    settings.log.printf("Before || B - L*U || %e\n", err);
+    if (err > 1e-6) { exit(1); }
+  }
+#endif
+
+  const i_t p = cuts.m;
+  if (cut_rhs.size() != static_cast<size_t>(p)) {
+    settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
+    return -1;
+  }
+  settings.log.debug("Number of cuts %d\n", p);
+  settings.log.debug("Original lp rows %d\n", lp.num_rows);
+  settings.log.debug("Original lp cols %d\n", lp.num_cols);
+
+  csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
+  lp.A.to_compressed_row(new_A_row);
+
+  i_t append_status = new_A_row.append_rows(cuts);
+  if (append_status != 0) {
+    settings.log.printf("append_rows error: %d\n", append_status);
+    exit(1);
+  }
+
+  csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
+  new_A_row.to_compressed_col(new_A_col);
+
+  // Add in slacks variables for the new rows
+  lp.lower.resize(lp.num_cols + p);
+  lp.upper.resize(lp.num_cols + p);
+  lp.objective.resize(lp.num_cols + p);
+  i_t nz = new_A_col.col_start[lp.num_cols];
+  new_A_col.col_start.resize(lp.num_cols + p + 1);
+  new_A_col.i.resize(nz + p);
+  new_A_col.x.resize(nz + p);
+  i_t k = lp.num_rows;
+  for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
+    new_A_col.col_start[j] = nz;
+    new_A_col.i[nz]        = k++;
+    new_A_col.x[nz]        = 1.0;
+    nz++;
+    lp.lower[j]     = 0.0;
+    lp.upper[j]     = inf;
+    lp.objective[j] = 0.0;
+    new_slacks.push_back(j);
+  }
+  settings.log.debug("Done adding slacks\n");
+  new_A_col.col_start[lp.num_cols + p] = nz;
+  new_A_col.n                          = lp.num_cols + p;
+
+  lp.A         = new_A_col;
+
+  // Check that all slack columns have length 1
+  for (i_t slack: new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end = lp.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len);
+      exit(1);
+    }
+  }
+
+
+  i_t old_rows = lp.num_rows;
+  lp.num_rows += p;
+  i_t old_cols = lp.num_cols;
+  lp.num_cols += p;
+
+  lp.rhs.resize(lp.num_rows);
+  for (i_t k = old_rows; k < old_rows + p; k++) {
+    const i_t h = k - old_rows;
+    lp.rhs[k]   = cut_rhs[h];
+  }
+  settings.log.debug("Done adding rhs\n");
+
+  // Construct C_B = C(:, basic_list)
+  std::vector<i_t> C_col_degree(lp.num_cols, 0);
+  i_t cuts_nz = cuts.row_start[p];
+  for (i_t q = 0; q < cuts_nz; q++) {
+    const i_t j = cuts.j[q];
+    if (j >= lp.num_cols) {
+      settings.log.printf("j %d is greater than p %d\n", j, p);
+      return -1;
+    }
+    C_col_degree[j]++;
+  }
+  settings.log.debug("Done computing C_col_degree\n");
+
+  std::vector<i_t> in_basis(old_cols, -1);
+  const i_t num_basic = static_cast<i_t>(basic_list.size());
+  i_t C_B_nz          = 0;
+  for (i_t k = 0; k < num_basic; k++) {
+    const i_t j = basic_list[k];
+    if (j < 0 || j >= old_cols) {
+      settings.log.printf(
+        "basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols);
+      return -1;
+    }
+    in_basis[j] = k;
+    // The cuts are on the original variables. So it is possible that
+    // a slack will be basic and thus not part of the cuts matrix
+    if (j < cuts.n) { C_B_nz += C_col_degree[j]; }
+  }
+  settings.log.debug("Done estimating C_B_nz\n");
+
+  csr_matrix_t<i_t, f_t> C_B(p, num_basic, C_B_nz);
+  nz = 0;
+  for (i_t i = 0; i < p; i++) {
+    C_B.row_start[i]    = nz;
+    const i_t row_start = cuts.row_start[i];
+    const i_t row_end   = cuts.row_start[i + 1];
+    for (i_t q = row_start; q < row_end; q++) {
+      const i_t j       = cuts.j[q];
+      const i_t j_basis = in_basis[j];
+      if (j_basis == -1) { continue; }
+      C_B.j[nz] = j_basis;
+      C_B.x[nz] = cuts.x[q];
+      nz++;
+    }
+  }
+  C_B.row_start[p] = nz;
+
+  if (nz != C_B_nz) {
+    settings.log.printf("Add cuts: predicted nz %d actual nz %d\n", C_B_nz, nz);
+    for (i_t i = 0; i < p; i++) {
+      const i_t row_start = cuts.row_start[i];
+      const i_t row_end = cuts.row_start[i + 1];
+      for (i_t q = row_start; q < row_end; q++) {
+        const i_t j = cuts.j[q];
+        printf("C(%d, %d) = %e\n", i, j, C_B.x[q]);
+      }
+    }
+    return -1;
+  }
+  settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
+
+  // Adjust the basis update to include the new cuts
+  basis_update.append_cuts(C_B);
+
+  basic_list.resize(lp.num_rows, 0);
+  i_t h = old_cols;
+  for (i_t j = old_rows; j < lp.num_rows; j++) {
+    basic_list[j] = h++;
+  }
+
+#ifdef CHECK_BASIS
+  // Check the basis update
+  csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+  basis_update.multiply_lu(Btest);
+
+  csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+  form_b(lp.A, basic_list, B);
+
+  csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+  add(Btest, B, 1.0, -1.0, Diff);
+  const f_t err = Diff.norm1();
+  settings.log.printf("After || B - L*U || %e\n", err);
+  if (err > 1e-6) {
+    settings.log.printf("Diff matrix\n");
+    // Diff.print_matrix();
+    exit(1);
+  }
+#endif
+  // Adjust the vstatus
+  vstatus.resize(lp.num_cols);
+  for (i_t j = old_cols; j < lp.num_cols; j++) {
+    vstatus[j] = variable_status_t::BASIC;
+  }
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csr_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update)
+{
+  std::vector<i_t> cuts_to_remove;
+  cuts_to_remove.reserve(lp.num_rows - original_rows);
+  std::vector<i_t> slacks_to_remove;
+  slacks_to_remove.reserve(lp.num_rows - original_rows);
+  const f_t dual_tol = 1e-10;
+
+  std::vector<i_t> is_slack(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack[j] = 1;
+    // Check that slack column length is 1
+    const i_t col_start = lp.A.col_start[j];
+    const i_t col_end = lp.A.col_start[j + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Remove cuts: Slack %d has %d nzs in column\n", j, col_len);
+      exit(1);
+    }
+  }
+
+  for (i_t k = original_rows; k < lp.num_rows; k++) {
+    if (std::abs(y[k]) < dual_tol) {
+      const i_t row_start = Arow.row_start[k];
+      const i_t row_end   = Arow.row_start[k + 1];
+      i_t last_slack      = -1;
+      const f_t slack_tol = 1e-3;
+      for (i_t p = row_start; p < row_end; p++) {
+        const i_t j      = Arow.j[p];
+        if (is_slack[j]) {
+          if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; }
+        }
+      }
+      if (last_slack != -1) {
+        cuts_to_remove.push_back(k);
+        slacks_to_remove.push_back(last_slack);
+      }
+    }
+  }
+
+  if (cuts_to_remove.size() > 0) {
+    //settings.log.printf("Removing %d cuts\n", cuts_to_remove.size());
+    std::vector<i_t> marked_rows(lp.num_rows, 0);
+    for (i_t i : cuts_to_remove) {
+      marked_rows[i] = 1;
+    }
+    std::vector<i_t> marked_cols(lp.num_cols, 0);
+    for (i_t j : slacks_to_remove) {
+      marked_cols[j] = 1;
+    }
+
+    std::vector<f_t> new_rhs(lp.num_rows - cuts_to_remove.size());
+    std::vector<f_t> new_solution_y(lp.num_rows - cuts_to_remove.size());
+    i_t h = 0;
+    for (i_t i = 0; i < lp.num_rows; i++) {
+      if (!marked_rows[i]) {
+        new_rhs[h]        = lp.rhs[i];
+        new_solution_y[h] = y[i];
+        h++;
+      }
+    }
+    csr_matrix_t<i_t, f_t> new_Arow(1, 1, 0);
+    Arow.remove_rows(marked_rows, new_Arow);
+    Arow = new_Arow;
+    Arow.to_compressed_col(lp.A);
+
+    std::vector<f_t> new_objective(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_lower(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_upper(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_type_t> new_var_types(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_status_t> new_vstatus(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_basic_list;
+    new_basic_list.reserve(lp.num_rows - slacks_to_remove.size());
+    std::vector<i_t> new_nonbasic_list;
+    new_nonbasic_list.reserve(nonbasic_list.size());
+    std::vector<f_t> new_solution_x(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_solution_z(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0);
+    h = 0;
+    for (i_t k = 0; k < lp.num_cols; k++) {
+      if (!marked_cols[k]) {
+        new_objective[h]  = lp.objective[k];
+        new_lower[h]      = lp.lower[k];
+        new_upper[h]      = lp.upper[k];
+        new_var_types[h]  = var_types[k];
+        new_vstatus[h]    = vstatus[k];
+        new_solution_x[h] = x[k];
+        new_solution_z[h] = z[k];
+        new_is_slacks[h] = is_slack[k];
+        if (new_vstatus[h] != variable_status_t::BASIC) {
+          new_nonbasic_list.push_back(h);
+        } else {
+          new_basic_list.push_back(h);
+        }
+        h++;
+      }
+    }
+    lp.A.remove_columns(marked_cols);
+    lp.A.to_compressed_row(Arow);
+    lp.objective  = new_objective;
+    lp.lower      = new_lower;
+    lp.upper      = new_upper;
+    lp.rhs        = new_rhs;
+    var_types     = new_var_types;
+    lp.num_cols   = lp.A.n;
+    lp.num_rows   = lp.A.m;
+
+    new_slacks.clear();
+    new_slacks.reserve(lp.num_cols);
+    for (i_t j = 0; j < lp.num_cols; j++) {
+        if (new_is_slacks[j]) {
+            new_slacks.push_back(j);
+        }
+    }
+    basic_list    = new_basic_list;
+    nonbasic_list = new_nonbasic_list;
+    vstatus       = new_vstatus;
+    x             = new_solution_x;
+    y             = new_solution_y;
+    z             = new_solution_z;
+
+    settings.log.debug("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n",
+                        cuts_to_remove.size(),
+                        lp.num_rows,
+                        lp.num_cols,
+                        lp.A.col_start[lp.A.n]);
+
+    basis_update.resize(lp.num_rows);
+    basis_update.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus);
+  }
+}
+
+
+#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
+template class cut_pool_t<int, double>;
+template class cut_generation_t<int, double>;
+template class mixed_integer_gomory_base_inequality_t<int, double>;
+template class mixed_integer_rounding_cut_t<int, double>;
+
+template
+int add_cuts(const simplex_solver_settings_t<int, double>& settings,
+              const csr_matrix_t<int, double>& cuts,
+              const std::vector<double>& cut_rhs,
+              lp_problem_t<int, double>& lp,
+              std::vector<int>& new_slacks,
+              lp_solution_t<int, double>& solution,
+              basis_update_mpf_t<int, double>& basis_update,
+              std::vector<int>& basic_list,
+              std::vector<int>& nonbasic_list,
+              std::vector<variable_status_t>& vstatus,
+              std::vector<double>& edge_norms);
+
+template
+void remove_cuts<int, double>(lp_problem_t<int, double>& lp,
+                 const simplex_solver_settings_t<int, double>& settings,
+                 csr_matrix_t<int, double>& Arow,
+                 std::vector<int>& new_slacks,
+                 int original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<double>& x,
+                 std::vector<double>& y,
+                 std::vector<double>& z,
+                 std::vector<int>& basic_list,
+                 std::vector<int>& nonbasic_list,
+                 basis_update_mpf_t<int, double>& basis_update);
+#endif
+
+} // namespace cuopt::linear_programming::dual_simplex
+
+
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
new file mode 100644
index 000000000..e7014e546
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -0,0 +1,313 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+#pragma once
+
+#include <dual_simplex/basis_updates.hpp>
+#include <dual_simplex/presolve.hpp>
+#include <dual_simplex/simplex_solver_settings.hpp>
+#include <dual_simplex/sparse_vector.hpp>
+#include <dual_simplex/types.hpp>
+#include <dual_simplex/user_problem.hpp>
+
+
+#include <cmath>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+enum cut_type_t : int8_t {
+   MIXED_INTEGER_GOMORY = 0,
+   MIXED_INTEGER_ROUNDING  = 1,
+   KNAPSACK = 2,
+};
+
+template <typename i_t, typename f_t>
+void print_cut_types(const std::vector<cut_type_t>& cut_types, const simplex_solver_settings_t<i_t, f_t>& settings) {
+  i_t num_gomory_cuts = 0;
+  i_t num_mir_cuts = 0;
+  i_t num_knapsack_cuts = 0;
+  for (i_t i = 0; i < cut_types.size(); i++) {
+    if (cut_types[i] == cut_type_t::MIXED_INTEGER_GOMORY) {
+      num_gomory_cuts++;
+    } else if (cut_types[i] == cut_type_t::MIXED_INTEGER_ROUNDING) {
+      num_mir_cuts++;
+    } else if (cut_types[i] == cut_type_t::KNAPSACK) {
+      num_knapsack_cuts++;
+    }
+  }
+  settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts);
+}
+
+
+template <typename i_t, typename f_t>
+f_t minimum_violation(const csr_matrix_t<i_t, f_t>& C,
+                      const std::vector<f_t>& cut_rhs,
+                      const std::vector<f_t>& x)
+{
+  // Check to see that this is a cut i.e C*x > d
+  std::vector<f_t> Cx(C.m);
+  csc_matrix_t<i_t, f_t> C_col(C.m, C.n, 0);
+  C.to_compressed_col(C_col);
+  matrix_vector_multiply(C_col, 1.0, x, 0.0, Cx);
+  f_t min_cut_violation = inf;
+  for (i_t k = 0; k < Cx.size(); k++) {
+    if (Cx[k] <= cut_rhs[k]) {
+      printf("C*x <= d for cut %d. C*x %e rhs %e\n", k, Cx[k], cut_rhs[k]);
+      exit(1);
+    }
+    min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]);
+  }
+  return min_cut_violation;
+}
+
+template <typename i_t, typename f_t>
+class cut_pool_t {
+ public:
+  cut_pool_t(i_t original_vars, const simplex_solver_settings_t<i_t, f_t>& settings)
+    : original_vars_(original_vars),
+      settings_(settings),
+      cut_storage_(0, original_vars, 0),
+      rhs_storage_(0),
+      cut_age_(0),
+      cut_type_(0),
+      scored_cuts_(0)
+  {
+  }
+
+  // Add a cut in the form: cut'*x >= rhs.
+  // We expect that the cut is violated by the current relaxation xstar
+  // cut'*xstart < rhs
+  void add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
+
+  void score_cuts(std::vector<f_t>& x_relax);
+
+  // We return the cuts in the form best_cuts*x <= best_rhs
+  i_t get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs, std::vector<cut_type_t>& best_cut_types);
+
+  void age_cuts();
+
+  void drop_cuts();
+
+  i_t pool_size() const { return cut_storage_.m; }
+
+ private:
+  f_t cut_distance(i_t row, const std::vector<f_t>& x, f_t& cut_violation, f_t &cut_norm);
+  f_t cut_density(i_t row);
+  f_t cut_orthogonality(i_t i, i_t j);
+
+  i_t original_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+
+  csr_matrix_t<i_t, f_t> cut_storage_;
+  std::vector<f_t> rhs_storage_;
+  std::vector<i_t> cut_age_;
+  std::vector<cut_type_t> cut_type_;
+
+  i_t scored_cuts_;
+  std::vector<f_t> cut_distances_;
+  std::vector<f_t> cut_norms_;
+  std::vector<f_t> cut_orthogonality_;
+  std::vector<f_t> cut_scores_;
+  std::vector<i_t> best_cuts_;
+};
+
+template <typename i_t, typename f_t>
+class knapsack_generation_t {
+ public:
+  knapsack_generation_t(const lp_problem_t<i_t, f_t>& lp,
+                        const simplex_solver_settings_t<i_t, f_t>& settings,
+                        csr_matrix_t<i_t, f_t>& Arow,
+                        const std::vector<i_t>& new_slacks,
+                        const std::vector<variable_type_t>& var_types);
+
+  i_t generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                             const simplex_solver_settings_t<i_t, f_t>& settings,
+                             csr_matrix_t<i_t, f_t>& Arow,
+                             const std::vector<i_t>& new_slacks,
+                             const std::vector<variable_type_t>& var_types,
+                             const std::vector<f_t>& xstar,
+                             i_t knapsack_row,
+                             sparse_vector_t<i_t, f_t>& cut,
+                             f_t& cut_rhs);
+
+  i_t num_knapsack_constraints() const { return knapsack_constraints_.size(); }
+  const std::vector<i_t>& get_knapsack_constraints() const { return knapsack_constraints_; }
+
+ private:
+  f_t greedy_knapsack_problem(const std::vector<f_t>& values,
+                              const std::vector<f_t>& weights,
+                              f_t rhs,
+                              std::vector<f_t>& solution);
+  f_t solve_knapsack_problem(const std::vector<f_t>& values,
+                             const std::vector<f_t>& weights,
+                             f_t rhs,
+                             std::vector<f_t>& solution);
+
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> knapsack_constraints_;
+};
+
+template <typename i_t, typename f_t>
+class cut_generation_t {
+ public:
+  cut_generation_t(cut_pool_t<i_t, f_t>& cut_pool,
+                   const lp_problem_t<i_t, f_t>& lp,
+                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                   csr_matrix_t<i_t, f_t>& Arow,
+                   const std::vector<i_t>& new_slacks,
+                   const std::vector<variable_type_t>& var_types)
+    : cut_pool_(cut_pool), knapsack_generation_(lp, settings, Arow, new_slacks, var_types)
+  {
+  }
+
+  void generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                     const simplex_solver_settings_t<i_t, f_t>& settings,
+                     csr_matrix_t<i_t, f_t>& Arow,
+                     const std::vector<i_t>& new_slacks,
+                     const std::vector<variable_type_t>& var_types,
+                     basis_update_mpf_t<i_t, f_t>& basis_update,
+                     const std::vector<f_t>& xstar,
+                     const std::vector<i_t>& basic_list,
+                     const std::vector<i_t>& nonbasic_list);
+ private:
+
+  void generate_gomory_cuts(const lp_problem_t<i_t, f_t>& lp,
+                            const simplex_solver_settings_t<i_t, f_t>& settings,
+                            csr_matrix_t<i_t, f_t>& Arow,
+                            const std::vector<i_t>& new_slacks,
+                            const std::vector<variable_type_t>& var_types,
+                            basis_update_mpf_t<i_t, f_t>& basis_update,
+                            const std::vector<f_t>& xstar,
+                            const std::vector<i_t>& basic_list,
+                            const std::vector<i_t>& nonbasic_list);
+
+  void generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
+                         const simplex_solver_settings_t<i_t, f_t>& settings,
+                         csr_matrix_t<i_t, f_t>& Arow,
+                         const std::vector<i_t>& new_slacks,
+                         const std::vector<variable_type_t>& var_types,
+                         const std::vector<f_t>& xstar);
+
+  void generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                              csr_matrix_t<i_t, f_t>& Arow,
+                              const std::vector<i_t>& new_slacks,
+                              const std::vector<variable_type_t>& var_types,
+                              const std::vector<f_t>& xstar);
+  cut_pool_t<i_t, f_t>& cut_pool_;
+  knapsack_generation_t<i_t, f_t> knapsack_generation_;
+};
+
+template <typename i_t, typename f_t>
+class mixed_integer_gomory_base_inequality_t {
+ public:
+  mixed_integer_gomory_base_inequality_t(const lp_problem_t<i_t, f_t>& lp,
+                                         basis_update_mpf_t<i_t, f_t>& basis_update,
+                                         const std::vector<i_t> nonbasic_list)
+    : b_bar_(lp.num_rows, 0.0),
+      nonbasic_mark_(lp.num_cols, 0),
+      x_workspace_(lp.num_cols, 0.0),
+      x_mark_(lp.num_cols, 0)
+  {
+    basis_update.b_solve(lp.rhs, b_bar_);
+    for (i_t j : nonbasic_list) {
+      nonbasic_mark_[j] = 1;
+    }
+  }
+
+  // Generates the base inequalities: C*x == d that will be turned into cuts
+  i_t generate_base_inequality(const lp_problem_t<i_t, f_t>& lp,
+                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                               csr_matrix_t<i_t, f_t>& Arow,
+                               const std::vector<variable_type_t>& var_types,
+                               basis_update_mpf_t<i_t, f_t>& basis_update,
+                               const std::vector<f_t>& xstar,
+                               const std::vector<i_t>& basic_list,
+                               const std::vector<i_t>& nonbasic_list,
+                               i_t i,
+                               sparse_vector_t<i_t, f_t>& inequality,
+                               f_t& inequality_rhs);
+
+ private:
+  std::vector<f_t> b_bar_;
+  std::vector<i_t> nonbasic_mark_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+};
+
+template <typename i_t, typename f_t>
+class mixed_integer_rounding_cut_t {
+ public:
+  mixed_integer_rounding_cut_t(i_t num_vars, const simplex_solver_settings_t<i_t, f_t>& settings)
+    : num_vars_(num_vars),
+      settings_(settings),
+      x_workspace_(num_vars, 0.0),
+      x_mark_(num_vars, 0),
+      has_lower_(num_vars, 0),
+      has_upper_(num_vars, 0),
+      needs_complement_(false)
+  {
+  }
+
+  void initialize(const lp_problem_t<i_t, f_t>& lp,
+                  const std::vector<i_t>& new_slacks,
+                  const std::vector<f_t>& xstar);
+
+  i_t generate_cut(const sparse_vector_t<i_t, f_t>& a,
+                   f_t beta,
+                   const std::vector<f_t>& upper_bounds,
+                   const std::vector<f_t>& lower_bounds,
+                   const std::vector<variable_type_t>& var_types,
+                   sparse_vector_t<i_t, f_t>& cut,
+                   f_t& cut_rhs);
+
+  void substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                         csr_matrix_t<i_t, f_t>& Arow,
+                         sparse_vector_t<i_t, f_t>& cut,
+                         f_t& cut_rhs);
+
+ private:
+  i_t num_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+  std::vector<i_t> has_lower_;
+  std::vector<i_t> has_upper_;
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> slack_rows_;
+  bool needs_complement_;
+};
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms);
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csr_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update);
+
+}
+
diff --git a/cpp/src/dual_simplex/dense_matrix.hpp b/cpp/src/dual_simplex/dense_matrix.hpp
index b1fc521b3..3f5287113 100644
--- a/cpp/src/dual_simplex/dense_matrix.hpp
+++ b/cpp/src/dual_simplex/dense_matrix.hpp
@@ -18,6 +18,8 @@ class dense_matrix_t {
  public:
   dense_matrix_t(i_t rows, i_t cols) : m(rows), n(cols), values(rows * cols, 0.0) {}
 
+  dense_matrix_t(i_t rows, i_t cols, f_t value) : m(rows), n(cols), values(rows * cols, value) {}
+
   void resize(i_t rows, i_t cols)
   {
     m = rows;
diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp
index 1d66a21f7..18ca43912 100644
--- a/cpp/src/dual_simplex/mip_node.hpp
+++ b/cpp/src/dual_simplex/mip_node.hpp
@@ -59,6 +59,7 @@ class mip_node_t {
       node_id(0),
       branch_var(-1),
       branch_dir(rounding_direction_t::NONE),
+      integer_infeasible(-1),
       vstatus(basis)
   {
     children[0] = nullptr;
@@ -71,6 +72,7 @@ class mip_node_t {
              i_t branch_variable,
              rounding_direction_t branch_direction,
              f_t branch_var_value,
+             i_t integer_inf,
              const std::vector<variable_status_t>& basis)
     : status(node_status_t::PENDING),
       lower_bound(parent_node->lower_bound),
@@ -80,8 +82,8 @@ class mip_node_t {
       branch_var(branch_variable),
       branch_dir(branch_direction),
       fractional_val(branch_var_value),
+      integer_infeasible(integer_inf),
       vstatus(basis)
-
   {
     branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var]
                                                                       : std::ceil(branch_var_value);
@@ -245,6 +247,7 @@ class mip_node_t {
   f_t branch_var_lower;
   f_t branch_var_upper;
   f_t fractional_val;
+  i_t integer_infeasible;
 
   mip_node_t<i_t, f_t>* parent;
   std::unique_ptr<mip_node_t> children[2];
@@ -296,6 +299,7 @@ class search_tree_t {
   void branch(mip_node_t<i_t, f_t>* parent_node,
               const i_t branch_var,
               const f_t fractional_val,
+              const i_t integer_infeasible,
               const std::vector<variable_status_t>& parent_vstatus,
               const lp_problem_t<i_t, f_t>& original_lp,
               logger_t& log)
@@ -308,8 +312,8 @@ class search_tree_t {
                                                              branch_var,
                                                              rounding_direction_t::DOWN,
                                                              fractional_val,
+                                                             integer_infeasible,
                                                              parent_vstatus);
-
     graphviz_edge(log,
                   parent_node,
                   down_child.get(),
@@ -323,6 +327,7 @@ class search_tree_t {
                                                            branch_var,
                                                            rounding_direction_t::UP,
                                                            fractional_val,
+                                                           integer_infeasible,
                                                            parent_vstatus);
 
     graphviz_edge(log,
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 56298ef4d..94472edaa 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -623,14 +623,17 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t<i_t, f_t>& lp,
                                            const std::vector<i_t>& basic_list,
                                            const std::vector<f_t>& x,
                                            std::vector<f_t>& squared_infeasibilities,
-                                           std::vector<i_t>& infeasibility_indices)
+                                           std::vector<i_t>& infeasibility_indices,
+                                           f_t& primal_inf)
 {
   const i_t m = lp.num_rows;
   const i_t n = lp.num_cols;
-  squared_infeasibilities.resize(n, 0.0);
+  squared_infeasibilities.resize(n);
+  std::fill(squared_infeasibilities.begin(), squared_infeasibilities.end(), 0.0);
   infeasibility_indices.reserve(n);
   infeasibility_indices.clear();
-  f_t primal_inf = 0.0;
+  f_t primal_inf_squared = 0.0;
+  primal_inf             = 0.0;
   for (i_t k = 0; k < m; ++k) {
     const i_t j            = basic_list[k];
     const f_t lower_infeas = lp.lower[j] - x[j];
@@ -640,10 +643,11 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t<i_t, f_t>& lp,
       const f_t square_infeas    = infeas * infeas;
       squared_infeasibilities[j] = square_infeas;
       infeasibility_indices.push_back(j);
-      primal_inf += square_infeas;
+      primal_inf_squared += square_infeas;
+      primal_inf += infeas;
     }
   }
-  return primal_inf;
+  return primal_inf_squared;
 }
 
 template <typename i_t, typename f_t>
@@ -1227,8 +1231,8 @@ i_t initialize_steepest_edge_norms(const lp_problem_t<i_t, f_t>& lp,
       last_log = tic();
       settings.log.printf("Initialized %d of %d steepest edge norms in %.2fs\n", k, m, now);
     }
-    if (toc(start_time) > settings.time_limit) { return -1; }
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return -1; }
+    if (toc(start_time) > settings.time_limit) { printf("initialize_steepest_edge time limit\n"); return -1; }
+    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { printf("initialize_steepest_edge concurrent_halt\n"); return -1; }
   }
   return 0;
 }
@@ -1729,6 +1733,74 @@ f_t dual_infeasibility(const lp_problem_t<i_t, f_t>& lp,
   return sum_infeasible;
 }
 
+
+template <typename i_t, typename f_t>
+f_t primal_infeasibility_breakdown(const lp_problem_t<i_t, f_t>& lp,
+                                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                                   const std::vector<variable_status_t>& vstatus,
+                                   const std::vector<f_t>& x,
+                                   f_t& basic_infeas,
+                                   f_t& nonbasic_infeas,
+                                   f_t& basic_over)
+{
+  const i_t n    = lp.num_cols;
+  f_t primal_inf = 0;
+  basic_infeas = 0.0;
+  basic_over = 0.0;
+  nonbasic_infeas = 0.0;
+  for (i_t j = 0; j < n; ++j) {
+    if (x[j] < lp.lower[j]) {
+      // x_j < l_j => -x_j > -l_j => -x_j + l_j > 0
+      const f_t infeas = -x[j] + lp.lower[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) {
+          basic_over += infeas;
+        }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+    if (x[j] > lp.upper[j]) {
+      // x_j > u_j => x_j - u_j > 0
+      const f_t infeas = x[j] - lp.upper[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) {
+          basic_over += infeas;
+        }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+  }
+  return primal_inf;
+}
+
 template <typename i_t, typename f_t>
 f_t primal_infeasibility(const lp_problem_t<i_t, f_t>& lp,
                          const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -2014,7 +2086,9 @@ f_t amount_of_perturbation(const lp_problem_t<i_t, f_t>& lp, const std::vector<f
 }
 
 template <typename i_t, typename f_t>
-void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
+void prepare_optimality(i_t info,
+                        f_t orig_primal_infeas,
+                        const lp_problem_t<i_t, f_t>& lp,
                         const simplex_solver_settings_t<i_t, f_t>& settings,
                         basis_update_mpf_t<i_t, f_t>& ft,
                         const std::vector<f_t>& objective,
@@ -2036,6 +2110,7 @@ void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
   sol.objective      = compute_objective(lp, sol.x);
   sol.user_objective = compute_user_objective(lp, sol.objective);
   f_t perturbation   = phase2::amount_of_perturbation(lp, objective);
+  f_t orig_perturbation = perturbation;
   if (perturbation > 1e-6 && phase == 2) {
     // Try to remove perturbation
     std::vector<f_t> unperturbed_y(m);
@@ -2081,6 +2156,23 @@ void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
       settings.log.printf("\n");
     }
   }
+
+  if (primal_infeas > 10.0*settings.primal_tol)
+  {
+    f_t basic_infeas = 0.0;
+    f_t nonbasic_infeas = 0.0;
+    f_t basic_over = 0.0;
+    phase2::primal_infeasibility_breakdown(lp, settings, vstatus, x, basic_infeas, nonbasic_infeas, basic_over);
+    printf("Primal infeasibility %e/%e (Basic %e, Nonbasic %e, Basic over %e). Perturbation %e/%e. Info %d\n",
+           primal_infeas,
+           orig_primal_infeas,
+           basic_infeas,
+           nonbasic_infeas,
+           basic_over,
+           orig_perturbation,
+           perturbation,
+           info);
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -2241,7 +2333,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     assert(superbasic_list.size() == 0);
     assert(nonbasic_list.size() == n - m);
 
-    if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) {
+    if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) >
+        0) {
       return dual::status_t::NUMERICAL;
     }
 
@@ -2268,7 +2361,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
 #ifdef COMPUTE_DUAL_RESIDUAL
   std::vector<f_t> dual_res1;
-  compute_dual_residual(lp.A, objective, y, z, dual_res1);
+  phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1);
   f_t dual_res_norm = vector_norm_inf<i_t, f_t>(dual_res1);
   if (dual_res_norm > settings.tight_tol) {
     settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm);
@@ -2322,10 +2415,27 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
       std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1);
       if (phase2::initialize_steepest_edge_norms(
             lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) {
+        printf("Bad return from initialize steepest edge norms\n");
         return dual::status_t::TIME_LIMIT;
       }
     }
   } else {
+
+    // Check that none of the basic variables have a steepest edge that is nonpositive
+    for (i_t k = 0; k < m; k++)
+    {
+      const i_t j = basic_list[k];
+      bool fix_needed = false;
+      if (delta_y_steepest_edge[j] <= 0.0)
+      {
+        fix_needed = true;
+        //printf("Basic variable %d has a nonpositive steepest edge %e\n", j, delta_y_steepest_edge[j]);
+        delta_y_steepest_edge[j] = 1e-4;
+      }
+      if (fix_needed) {
+        //printf("Basic variable had nonpositive steepest edge\n");
+      }
+    }
     settings.log.printf("using exisiting steepest edge %e\n",
                         vector_norm2<i_t, f_t>(delta_y_steepest_edge));
   }
@@ -2357,8 +2467,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
   std::vector<uint8_t> bounded_variables(n, 0);
   phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables);
 
-  f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-    lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+  f_t primal_infeasibility;
+  f_t primal_infeasibility_squared =
+    phase2::compute_initial_primal_infeasibilities(lp,
+                                                   settings,
+                                                   basic_list,
+                                                   x,
+                                                   squared_infeasibilities,
+                                                   infeasibility_indices,
+                                                   primal_infeasibility);
 
 #ifdef CHECK_BASIC_INFEASIBILITIES
   phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0);
@@ -2399,7 +2516,60 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     }
     timers.pricing_time += timers.stop_timer();
     if (leaving_index == -1) {
-      phase2::prepare_optimality(lp,
+
+
+#ifdef CHECK_BASIS_UPDATE
+      for (i_t k = 0; k < basic_list.size(); k++) {
+        const i_t jj = basic_list[k];
+        sparse_vector_t<i_t, f_t> ei_sparse(m, 1);
+        ei_sparse.i[0] = k;
+        ei_sparse.x[0] = 1.0;
+        sparse_vector_t<i_t, f_t> ubar_sparse(m, 0);
+        ft.b_transpose_solve(ei_sparse, ubar_sparse);
+        std::vector<f_t> ubar_dense(m);
+        ubar_sparse.to_dense(ubar_dense);
+        std::vector<f_t> BTu_dense(m);
+        b_transpose_multiply(lp, basic_list, ubar_dense, BTu_dense);
+        for (i_t l = 0; l < m; l++) {
+          if (l != k) {
+              settings.log.printf("BTu_dense[%d] = %e i %d\n", l, BTu_dense[l], k);
+          } else {
+              settings.log.printf("BTu_dense[%d] = %e != 1.0 i %d\n", l, BTu_dense[l], k);
+          }
+        }
+        for (i_t h = 0; h < m; h++) {
+          settings.log.printf("i %d ubar_dense[%d] = %.16e\n", k, h, ubar_dense[h]);
+        }
+      }
+      settings.log.printf("ft.num_updates() %d\n", ft.num_updates());
+      for (i_t h = 0; h < m; h++) {
+        settings.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+      }
+
+#endif
+
+      primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
+        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
+      if (primal_infeasibility > settings.primal_tol) {
+
+        const i_t nz      = infeasibility_indices.size();
+        for (i_t k = 0; k < nz; ++k) {
+          const i_t j              = infeasibility_indices[k];
+          const f_t squared_infeas = squared_infeasibilities[j];
+          const f_t val            = squared_infeas / delta_y_steepest_edge[j];
+          if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) {
+            printf("Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n", iter, j, val, squared_infeas, delta_y_steepest_edge[j]);
+            //delta_y_steepest_edge[j] = 1e-4;
+          }
+        }
+
+        //printf("No leaving variable. Updated primal infeasibility: %e\n", primal_infeasibility);
+        //continue;
+      }
+
+      phase2::prepare_optimality(0,
+                                 primal_infeasibility,
+                                 lp,
                                  settings,
                                  ft,
                                  objective,
@@ -2556,16 +2726,24 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             std::vector<f_t> unperturbed_x(n);
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
-            x                    = unperturbed_x;
-            primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+            x = unperturbed_x;
+            primal_infeasibility_squared =
+              phase2::compute_initial_primal_infeasibilities(lp,
+                                                             settings,
+                                                             basic_list,
+                                                             x,
+                                                             squared_infeasibilities,
+                                                             infeasibility_indices,
+                                                             primal_infeasibility);
             settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility);
 
             objective = lp.objective;
             // Need to reset the objective value, since we have recomputed x
             obj = phase2::compute_perturbed_objective(objective, x);
             if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(1,
+                                         primal_infeasibility,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
@@ -2593,16 +2771,24 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             std::vector<f_t> unperturbed_x(n);
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
-            x                    = unperturbed_x;
-            primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+            x = unperturbed_x;
+            primal_infeasibility_squared =
+              phase2::compute_initial_primal_infeasibilities(lp,
+                                                             settings,
+                                                             basic_list,
+                                                             x,
+                                                             squared_infeasibilities,
+                                                             infeasibility_indices,
+                                                             primal_infeasibility);
 
             const f_t orig_dual_infeas = phase2::dual_infeasibility(
               lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol);
 
             if (primal_infeasibility <= settings.primal_tol &&
                 orig_dual_infeas <= settings.dual_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(2,
+                                         primal_infeasibility,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
@@ -2810,7 +2996,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                           delta_xB_0_sparse.i,
                                           squared_infeasibilities,
                                           infeasibility_indices,
-                                          primal_infeasibility);
+                                          primal_infeasibility_squared);
     // Update primal infeasibilities due to changes in basic variables
     // from the leaving and entering variables
     phase2::update_primal_infeasibilities(lp,
@@ -2822,7 +3008,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                           scaled_delta_xB_sparse.i,
                                           squared_infeasibilities,
                                           infeasibility_indices,
-                                          primal_infeasibility);
+                                          primal_infeasibility_squared);
     // Update the entering variable
     phase2::update_single_primal_infeasibility(lp.lower,
                                                lp.upper,
@@ -2883,14 +3069,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 #endif
     if (should_refactor) {
       bool should_recompute_x = false;
-      if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) {
+      if (ft.refactor_basis(
+            lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
         should_recompute_x = true;
         settings.log.printf("Failed to factorize basis. Iteration %d\n", iter);
         if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; }
         i_t count = 0;
         i_t deficient_size;
-        while ((deficient_size =
-                  ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus)) > 0) {
+        while ((deficient_size = ft.refactor_basis(
+                  lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) {
           settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n",
                               iter,
                               static_cast<int>(deficient_size));
@@ -2912,8 +3099,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
           lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
         x = unperturbed_x;
       }
-      phase2::compute_initial_primal_infeasibilities(
-        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+      primal_infeasibility_squared =
+        phase2::compute_initial_primal_infeasibilities(lp,
+                                                       settings,
+                                                       basic_list,
+                                                       x,
+                                                       squared_infeasibilities,
+                                                       infeasibility_indices,
+                                                       primal_infeasibility);
     }
 #ifdef CHECK_BASIC_INFEASIBILITIES
     phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7);
@@ -2951,7 +3144,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                           iter,
                           compute_user_objective(lp, obj),
                           infeasibility_indices.size(),
-                          primal_infeasibility,
+                          primal_infeasibility_squared,
                           sum_perturb,
                           now);
     }
diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp
index 80406dcf0..3d9849fbe 100644
--- a/cpp/src/dual_simplex/primal.cpp
+++ b/cpp/src/dual_simplex/primal.cpp
@@ -298,7 +298,15 @@ primal::status_t primal_phase2(i_t phase,
     factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A,
+                 settings,
+                 lp.lower,
+                 lp.upper,
+                 deficient,
+                 slacks_needed,
+                 basic_list,
+                 nonbasic_list,
+                 vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp
index 9f84e108d..f391598b1 100644
--- a/cpp/src/dual_simplex/pseudo_costs.cpp
+++ b/cpp/src/dual_simplex/pseudo_costs.cpp
@@ -133,6 +133,39 @@ void strong_branch_helper(i_t start,
   }
 }
 
+template <typename i_t, typename f_t>
+f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
+                    const simplex_solver_settings_t<i_t, f_t>& settings,
+                    const std::vector<variable_type_t>& var_types,
+                    const std::vector<variable_status_t>& root_vstatus,
+                    const std::vector<f_t>& edge_norms,
+                    i_t branch_var,
+                    f_t branch_var_lower,
+                    f_t branch_var_upper,
+                    i_t& iter)
+{
+  lp_problem_t child_problem      = original_lp;
+  child_problem.lower[branch_var] = branch_var_lower;
+  child_problem.upper[branch_var] = branch_var_upper;
+
+  simplex_solver_settings_t<i_t, f_t> child_settings = settings;
+  child_settings.set_log(false);
+  f_t lp_start_time              = tic();
+  child_settings.iteration_limit = 200;
+  lp_solution_t<i_t, f_t> solution(original_lp.num_rows, original_lp.num_cols);
+  std::vector<variable_status_t> vstatus = root_vstatus;
+  std::vector<f_t> child_edge_norms      = edge_norms;
+  dual::status_t status                  = dual_phase2(
+    2, 0, lp_start_time, child_problem, child_settings, vstatus, solution, iter, child_edge_norms);
+  //printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x));
+
+  if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) {
+    return compute_objective(child_problem, solution.x);
+  } else {
+    return std::numeric_limits<f_t>::quiet_NaN();
+  }
+}
+
 }  // namespace
 
 template <typename i_t, typename f_t>
@@ -317,6 +350,119 @@ i_t pseudo_costs_t<i_t, f_t>::variable_selection(const std::vector<i_t>& fractio
   return branch_var;
 }
 
+template <typename i_t, typename f_t>
+i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t, f_t>& lp,
+                                                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                                                 const std::vector<variable_type_t>& var_types,
+                                                 const std::vector<variable_status_t>& vstatus,
+                                                 const std::vector<f_t>& edge_norms,
+                                                 const std::vector<i_t>& fractional,
+                                                 const std::vector<f_t>& solution,
+                                                 f_t current_obj,
+                                                 logger_t& log)
+{
+  mutex.lock();
+
+  const i_t num_fractional = fractional.size();
+  std::vector<f_t> pseudo_cost_up(num_fractional);
+  std::vector<f_t> pseudo_cost_down(num_fractional);
+  std::vector<f_t> score(num_fractional);
+
+  i_t num_initialized_down;
+  i_t num_initialized_up;
+  f_t pseudo_cost_down_avg;
+  f_t pseudo_cost_up_avg;
+
+  i_t iter = 0;
+  i_t trial_branches = 0;
+
+  initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
+
+  mutex.unlock();
+
+  log.printf("PC: num initialized down %d up %d avg down %e up %e\n",
+             num_initialized_down,
+             num_initialized_up,
+             pseudo_cost_down_avg,
+             pseudo_cost_up_avg);
+
+
+  const i_t reliable_threshold = 1;
+
+  for (i_t k = 0; k < num_fractional; k++) {
+    const i_t j = fractional[k];
+    mutex.lock();
+    bool down_reliable = pseudo_cost_num_down[j] >= reliable_threshold;
+    mutex.unlock();
+    if (down_reliable) {
+      mutex.lock();
+      pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
+      mutex.unlock();
+    } else {
+      // Do trial branching on the down branch
+      i_t trial_iter = 0;
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j]), trial_iter);
+      trial_branches++;
+      iter += trial_iter;
+      if (!std::isnan(obj)) {
+        f_t change_in_obj = obj - current_obj;
+        f_t change_in_x = solution[j] - std::floor(solution[j]);
+        mutex.lock();
+        pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
+        pseudo_cost_num_down[j]++;
+        mutex.unlock();
+        pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
+      }
+    }
+
+    mutex.lock();
+    bool up_reliable = pseudo_cost_num_up[j] >= reliable_threshold;
+    mutex.unlock();
+    if (up_reliable) {
+      mutex.lock();
+      pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
+      mutex.unlock();
+    } else {
+      // Do trial branching on the up branch
+      i_t trial_iter = 0;
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j], trial_iter);
+      trial_branches++;
+      iter += trial_iter;
+      if (!std::isnan(obj)) {
+        f_t change_in_obj = obj - current_obj;
+        f_t change_in_x = std::ceil(solution[j]) - solution[j];
+        mutex.lock();
+        pseudo_cost_sum_up[j] += change_in_obj / change_in_x;
+        pseudo_cost_num_up[j]++;
+        pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
+        mutex.unlock();
+      }
+    }
+    constexpr f_t eps = 1e-6;
+    const f_t f_down  = solution[j] - std::floor(solution[j]);
+    const f_t f_up    = std::ceil(solution[j]) - solution[j];
+    score[k] =
+      std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps);
+  }
+
+  i_t branch_var = fractional[0];
+  f_t max_score  = -1;
+  i_t select     = -1;
+  for (i_t k = 0; k < num_fractional; k++) {
+    if (score[k] > max_score) {
+      max_score  = score[k];
+      branch_var = fractional[k];
+      select     = k;
+    }
+  }
+
+  printf(
+    "pc reliability branching on %d. Value %e. Score %e. Iter %d. Trial branches %d\n", branch_var, solution[branch_var], score[select], iter, trial_branches);
+
+
+  return branch_var;
+}
+
 template <typename i_t, typename f_t>
 void pseudo_costs_t<i_t, f_t>::update_pseudo_costs_from_strong_branching(
   const std::vector<i_t>& fractional, const std::vector<f_t>& root_soln)
diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp
index 799cdc3ff..20b2198e4 100644
--- a/cpp/src/dual_simplex/pseudo_costs.hpp
+++ b/cpp/src/dual_simplex/pseudo_costs.hpp
@@ -47,6 +47,16 @@ class pseudo_costs_t {
                          const std::vector<f_t>& solution,
                          logger_t& log);
 
+  i_t reliable_variable_selection(const lp_problem_t<i_t, f_t>& lp,
+                                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                                  const std::vector<variable_type_t>& var_types,
+                                  const std::vector<variable_status_t>& vstatus,
+                                  const std::vector<f_t>& edge_norms,
+                                  const std::vector<i_t>& fractional,
+                                  const std::vector<f_t>& solution,
+                                  f_t current_obj,
+                                  logger_t& log);
+
   void update_pseudo_costs_from_strong_branching(const std::vector<i_t>& fractional,
                                                  const std::vector<f_t>& root_soln);
   std::vector<f_t> pseudo_cost_sum_up;
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index a1cc049e7..7dbf0e1cc 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -72,8 +72,10 @@ struct simplex_solver_settings_t {
       num_threads(omp_get_max_threads() - 1),
       num_bfs_threads(std::min(num_threads / 4, 1)),
       num_diving_threads(std::min(num_threads - num_bfs_threads, 1)),
+      max_cut_passes(10),
       random_seed(0),
       inside_mip(0),
+      reliability_branching(-1),
       solution_callback(nullptr),
       heuristic_preemption_callback(nullptr),
       concurrent_halt(nullptr)
@@ -139,7 +141,9 @@ struct simplex_solver_settings_t {
   i_t random_seed;                 // random seed
   i_t num_bfs_threads;             // number of threads dedicated to the best-first search
   i_t num_diving_threads;          // number of threads dedicated to diving
+  i_t max_cut_passes;              // number of cut passes to make
   i_t inside_mip;  // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node
+  i_t reliability_branching;      // -1 automatic, 0 to disable, >0 to enable reliability branching
   std::function<void(std::vector<f_t>&, f_t)> solution_callback;
   std::function<void(const std::vector<f_t>&, f_t)> node_processed_callback;
   std::function<void()> heuristic_preemption_callback;
diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp
index d1d745cbd..d882e21e2 100644
--- a/cpp/src/dual_simplex/solution.hpp
+++ b/cpp/src/dual_simplex/solution.hpp
@@ -39,7 +39,7 @@ class lp_solution_t {
   std::vector<f_t> x;
   // Dual solution vector. Lagrange multipliers for equality constraints.
   std::vector<f_t> y;
-  // Dual solution vector. Lagrange multipliers for inequality constraints.
+  // Reduced costs
   std::vector<f_t> z;
   f_t objective;
   f_t user_objective;
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 1f31a757d..ea5d197a8 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -8,6 +8,7 @@
 #include <dual_simplex/solve.hpp>
 
 #include <dual_simplex/barrier.hpp>
+#include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/branch_and_bound.hpp>
 #include <dual_simplex/crossover.hpp>
 #include <dual_simplex/initial_basis.hpp>
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 7edc7b1eb..07d44f2e9 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -363,6 +363,75 @@ i_t csc_matrix_t<i_t, f_t>::remove_row(i_t row)
   return 0;
 }
 
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
+{
+  const i_t old_m  = this->m;
+  const i_t n      = this->n;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t C_row  = C.m;
+  if (C.n > n) {
+    printf("append_rows error: C.n %d n %d\n", C.n, n);
+    return -1;
+  }
+  const i_t C_nz   = C.row_start[C_row];
+  const i_t new_nz = old_nz + C_nz;
+  const i_t new_m  = old_m + C_row;
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+
+  i_t nz = old_nz;
+  for (i_t i = old_m; i < new_m; i++) {
+    const i_t k        = i - old_m;
+    const i_t nz_row   = C.row_start[k + 1] - C.row_start[k];
+    this->row_start[i] = nz;
+    nz += nz_row;
+  }
+  this->row_start[new_m] = nz;
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->j[p]  = C.j[q];
+  }
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->x[p]  = C.x[q];
+  }
+
+  this->m      = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_row(const sparse_vector_t<i_t, f_t>& c)
+{
+  const i_t old_m = this->m;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t c_nz = c.i.size();
+  const i_t new_nz = old_nz + c_nz;
+  const i_t new_m = old_m + 1;
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+  this->row_start[new_m] = new_nz;
+
+  i_t nz = old_nz;
+  for (i_t k = 0; k < c_nz; k++) {
+    this->j[nz] = c.i[k];
+    this->x[nz] = c.x[k];
+    nz++;
+  }
+
+  this->m = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void csc_matrix_t<i_t, f_t>::print_matrix(FILE* fid) const
 {
@@ -505,6 +574,10 @@ i_t csc_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
 #ifdef CHECK_MATRIX
   std::vector<i_t> row_marker(this->m, -1);
   for (i_t j = 0; j < this->n; ++j) {
+    if (j >= col_start.size()) {
+      printf("Col start too small size %ld n %d\n", col_start.size(), this->n);
+      return -1;
+    }
     const i_t col_start = this->col_start[j];
     const i_t col_end   = this->col_start[j + 1];
     if (col_start > col_end || col_start > this->col_start[this->n]) {
@@ -567,6 +640,7 @@ void csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
     const i_t row_end   = this->row_start[i + 1];
     for (i_t p = row_start; p < row_end; ++p) {
       const i_t j = this->j[p];
+      if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); }
       if (col_marker[j] == i) {
         printf("CSR Error (%s) : repeated column index %d in row %d\n", matrix_name.c_str(), j, i);
       }
diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp
index 9ae8ea80b..7be647270 100644
--- a/cpp/src/dual_simplex/sparse_matrix.hpp
+++ b/cpp/src/dual_simplex/sparse_matrix.hpp
@@ -151,6 +151,12 @@ class csr_matrix_t {
   // Create a new matrix with the marked rows removed
   i_t remove_rows(std::vector<i_t>& row_marker, csr_matrix_t<i_t, f_t>& Aout) const;
 
+  // Append rows from another CSR matrix
+  i_t append_rows(const csr_matrix_t<i_t, f_t>& C);
+
+  // Append a row from a sparse vector
+  i_t append_row(const sparse_vector_t<i_t, f_t>& c);
+
   // Ensures no repeated column indices within a row
   void check_matrix(std::string matrix_name = "") const;
 
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index 2d4745650..a8bd06afa 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -28,6 +28,21 @@ sparse_vector_t<i_t, f_t>::sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t
   }
 }
 
+template <typename i_t, typename f_t>
+sparse_vector_t<i_t, f_t>::sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row)
+{
+  const i_t row_start = A.row_start[row];
+  const i_t row_end   = A.row_start[row + 1];
+  const i_t nz        = row_end - row_start;
+  n                   = A.n;
+  i.reserve(nz);
+  x.reserve(nz);
+  for (i_t k = row_start; k < row_end; ++k) {
+    i.push_back(A.j[k]);
+    x.push_back(A.x[k]);
+  }
+}
+
 template <typename i_t, typename f_t>
 void sparse_vector_t<i_t, f_t>::from_dense(const std::vector<f_t>& in)
 {
@@ -106,6 +121,17 @@ void sparse_vector_t<i_t, f_t>::inverse_permute_vector(const std::vector<i_t>& p
   y.i = i_perm;
 }
 
+template <typename i_t, typename f_t>
+f_t sparse_vector_t<i_t, f_t>::dot(const std::vector<f_t>& x_dense) const
+{
+  const i_t nz = i.size();
+  f_t dot = 0.0;
+  for (i_t k = 0; k < nz; ++k) {
+    dot += x[k] * x_dense[i[k]];
+  }
+  return dot;
+}
+
 template <typename i_t, typename f_t>
 f_t sparse_vector_t<i_t, f_t>::sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const
 {
@@ -207,6 +233,30 @@ f_t sparse_vector_t<i_t, f_t>::find_coefficient(i_t index) const
   return std::numeric_limits<f_t>::quiet_NaN();
 }
 
+template <typename i_t, typename f_t>
+void sparse_vector_t<i_t, f_t>::squeeze(sparse_vector_t<i_t, f_t>& y) const
+{
+  y.n = n;
+
+  i_t nz = 0;
+  const i_t n = x.size();
+  for (i_t k = 0; k < n; k++) {
+    if (x[k] != 0.0) {
+      nz++;
+    }
+  }
+  y.i.reserve(nz);
+  y.x.reserve(nz);
+  y.i.clear();
+  y.x.clear();
+  for (i_t k = 0; k < n; k++) {
+    if (x[k] != 0.0) {
+      y.i.push_back(i[k]);
+      y.x.push_back(x[k]);
+    }
+  }
+}
+
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 template class sparse_vector_t<int, double>;
 #endif
diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp
index 7acfdc8b5..c56ebf6d9 100644
--- a/cpp/src/dual_simplex/sparse_vector.hpp
+++ b/cpp/src/dual_simplex/sparse_vector.hpp
@@ -25,6 +25,8 @@ class sparse_vector_t {
   sparse_vector_t(const std::vector<f_t>& in) { from_dense(in); }
   // Construct a sparse vector from a column of a CSC matrix
   sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t col);
+  // Construct a sparse vector from a row of a CSR matrix
+  sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row);
   // gather a dense vector into a sparse vector
   void from_dense(const std::vector<f_t>& in);
   // convert a sparse vector into a CSC matrix with a single column
@@ -38,6 +40,8 @@ class sparse_vector_t {
   void inverse_permute_vector(const std::vector<i_t>& p);
   // inverse permute a sparse vector into another sparse vector
   void inverse_permute_vector(const std::vector<i_t>& p, sparse_vector_t<i_t, f_t>& y) const;
+  // compute the dot product of a sparse vector with a dense vector
+  f_t dot(const std::vector<f_t>& x) const;
   // compute the dot product of a sparse vector with a column of a CSC matrix
   f_t sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const;
   // ensure the coefficients in the sparse vectory are sorted in terms of increasing index
@@ -47,6 +51,8 @@ class sparse_vector_t {
   void negate();
   f_t find_coefficient(i_t index) const;
 
+  void squeeze(sparse_vector_t<i_t, f_t>& y) const;
+
   i_t n;
   std::vector<i_t> i;
   std::vector<f_t> x;
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index 72d75cdc7..ebf617774 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -158,45 +158,116 @@ void optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
   // Replace Q with Q + Q^T
   i_t qn    = size_offsets - 1;  // Number of variables
   i_t q_nnz = size_indices;
-  Q_offsets_.resize(qn + 1);
-  std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0);
-  Q_indices_.reserve(2 * q_nnz);
-  Q_values_.reserve(2 * q_nnz);
-
-  // TODO: This is very inefficient for large Q matrices
-  // Build a map from (row,col) to value for Q+Q^T
-  std::map<std::pair<i_t, i_t>, f_t> Q_map;
-  for (i_t row = 0; row < qn; ++row) {
-    size_t start = Q_offsets[row];
-    size_t end   = Q_offsets[row + 1];
-    for (size_t idx = start; idx < end; ++idx) {
-      i_t col = Q_indices[idx];
-      f_t val = Q_values[idx];
-      auto ij = std::make_pair(row, col);
-      auto ji = std::make_pair(col, row);
-      Q_map[ij] += val;
-      Q_map[ji] += val;
+
+
+  // Construct H = Q + Q^T in triplet form first
+  // Then covert the triplet to CSR
+
+  std::vector<i_t> H_i;
+  std::vector<i_t> H_j;
+  std::vector<f_t> H_x;
+
+  H_i.reserve(2 * q_nnz);
+  H_j.reserve(2 * q_nnz);
+  H_x.reserve(2 * q_nnz);
+
+  for (i_t i = 0; i < qn; ++i) {
+    i_t row_start = Q_offsets[i];
+    i_t row_end   = Q_offsets[i + 1];
+    for (i_t p = row_start; p < row_end; ++p) {
+      i_t j = Q_indices[p];
+      f_t x = Q_values[p];
+      // Add H(i,j)
+      H_i.push_back(i);
+      H_j.push_back(j);
+      H_x.push_back(x);
+      // Add H(j,i)
+      H_i.push_back(j);
+      H_j.push_back(i);
+      H_x.push_back(x);
     }
   }
 
-  // Write map into CSR format (rows are built in key order, so each row's columns are sorted)
-  for (i_t row = 0; row < qn; ++row) {
-    for (auto it = Q_map.lower_bound(std::make_pair(row, 0));
-         it != Q_map.upper_bound(std::make_pair(row, std::numeric_limits<i_t>::max()));
-         ++it) {
-      i_t col = it->first.second;
-      f_t v   = it->second;
-      if (v != 0.0) {
-        Q_indices_.push_back(col);
-        Q_values_.push_back(v);
-        Q_offsets_[row + 1]++;
+  // Convert H to CSR format
+  // Get row counts
+  i_t H_nz = H_x.size();
+  std::vector<i_t> H_row_counts(qn, 0);
+  for (i_t k = 0; k < H_nz; ++k) {
+    H_row_counts[H_i[k]]++;
+  }
+  std::vector<i_t> H_cumulative_counts(qn + 1, 0);
+  for (i_t k = 0; k < qn; ++k) {
+    H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k];
+  }
+  std::vector<i_t> H_row_starts = H_cumulative_counts;
+  std::vector<i_t> H_map(H_nz);
+  std::vector<i_t> H_indices(H_nz);
+  std::vector<f_t> H_values(H_nz);
+  for (i_t k = 0; k < H_nz; ++k) {
+    const i_t p = H_cumulative_counts[H_i[k]]++;
+    H_map[k] = p;
+  }
+  rmm::device_uvector<i_t> d_H_map(H_nz, stream_view_);
+  rmm::device_uvector<i_t> d_H_j(H_nz, stream_view_);
+  rmm::device_uvector<f_t> d_H_x(H_nz, stream_view_);
+  rmm::device_uvector<i_t> d_H_indices(H_nz, stream_view_);
+  rmm::device_uvector<f_t> d_H_values(H_nz, stream_view_);
+
+  raft::copy(d_H_map.data(), H_map.data(), H_nz, stream_view_);
+  raft::copy(d_H_j.data(), H_j.data(), H_nz, stream_view_);
+  raft::copy(d_H_x.data(), H_x.data(), H_nz, stream_view_);
+  stream_view_.synchronize();
+  thrust::for_each_n(rmm::exec_policy(stream_view_),
+                     thrust::make_counting_iterator<i_t>(0),
+                     H_nz,
+                     [span_H_map = cuopt::make_span(d_H_map),
+                      span_H_j = cuopt::make_span(d_H_j),
+                      span_H_indices = cuopt::make_span(d_H_indices)] __device__(i_t k) {
+                      span_H_indices[span_H_map[k]] = span_H_j[k];
+                     });
+  thrust::for_each_n(rmm::exec_policy(stream_view_),
+                     thrust::make_counting_iterator<i_t>(0),
+                     H_nz,
+                     [span_H_map = cuopt::make_span(d_H_map),
+                      span_H_x = cuopt::make_span(d_H_x),
+                      span_H_values = cuopt::make_span(d_H_values)] __device__(i_t k) {
+                      span_H_values[span_H_map[k]] = span_H_x[k];
+                     });
+
+  raft::copy(H_indices.data(), d_H_indices.data(), H_nz, stream_view_);
+  raft::copy(H_values.data(), d_H_values.data(), H_nz, stream_view_);
+  stream_view_.synchronize();
+
+  // H_row_starts, H_indices, H_values are the CSR representation of H
+  // But this contains duplicate entries
+
+  std::vector<i_t> workspace(qn, -1);
+  Q_offsets_.resize(qn + 1);
+  std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0);
+  Q_indices_.resize(H_nz);
+  Q_values_.resize(H_nz);
+  i_t nz = 0;
+  for (i_t i = 0; i < qn; ++i)
+  {
+    i_t q = nz;                                 // row i will start at q
+    const i_t row_start = H_row_starts[i];
+    const i_t row_end = H_row_starts[i + 1];
+    for (i_t p = row_start; p < row_end; ++p) {
+      i_t j = H_indices[p];
+      if (workspace[j] >= q) {
+        Q_values_[workspace[j]] += H_values[p];  // H(i,j) is duplicate
+      } else {
+        workspace[j] = nz;                      // record where column j occurs
+        Q_indices_[nz] = j;                     // keep H(i,j)
+        Q_values_[nz] = H_values[p];
+        nz++;
       }
     }
+    Q_offsets_[i] = q;                          // record start of row i
   }
-  // Convert Q_offsets_new to cumulative sum
-  for (i_t i = 0; i < qn; ++i) {
-    Q_offsets_[i + 1] += Q_offsets_[i];
-  }
+  Q_offsets_[qn] = nz;                          // finalize Q
+  Q_indices_.resize(nz);
+  Q_values_.resize(nz);
 
   // FIX ME:: check for positive semi definite matrix
 }
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 4e3dc6465..8ae1fa51b 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -87,6 +87,9 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_DUALIZE, &pdlp_settings.dualize, -1, 1, -1},
     {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1},
     {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1},
+    {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits<i_t>::max(), 10},
+    {CUOPT_MIP_NODE_LIMIT, &mip_settings.node_limit, 0, std::numeric_limits<i_t>::max(), std::numeric_limits<i_t>::max()},
+    {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits<i_t>::max(), -1},
     {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1},
     {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}
   };
diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu
index 483ffeb68..823f14681 100644
--- a/cpp/src/mip/diversity/diversity_manager.cu
+++ b/cpp/src/mip/diversity/diversity_manager.cu
@@ -470,7 +470,7 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
     run_fj_alone(sol);
     return sol;
   }
-  rins.enable();
+  //rins.enable();
 
   generate_solution(timer.remaining_time(), false);
   if (timer.check_time_limit()) {
diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu
index b7e3a5331..9a125160c 100644
--- a/cpp/src/mip/diversity/lns/rins.cu
+++ b/cpp/src/mip/diversity/lns/rins.cu
@@ -262,6 +262,7 @@ void rins_t<i_t, f_t>::run_rins()
   branch_and_bound_settings.num_diving_threads = 1;
   branch_and_bound_settings.log.log            = false;
   branch_and_bound_settings.log.log_prefix     = "[RINS] ";
+  branch_and_bound_settings.max_cut_passes     = 0;
   branch_and_bound_settings.solution_callback  = [this, &rins_solution_queue](
                                                   std::vector<f_t>& solution, f_t objective) {
     rins_solution_queue.push_back(solution);
diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
index 5be807372..0000ccd29 100644
--- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh
+++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
@@ -86,7 +86,7 @@ class sub_mip_recombiner_t : public recombiner_t<i_t, f_t> {
     trivial_presolve(fixed_problem);
     fixed_problem.check_problem_representation(true);
     // brute force rounding threshold is 8
-    const bool run_sub_mip                             = fixed_problem.n_integer_vars > 8;
+    const bool run_sub_mip                             = 0 && fixed_problem.n_integer_vars > 8;
     dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET;
     dual_simplex::mip_solution_t<i_t, f_t> branch_and_bound_solution(1);
     if (run_sub_mip) {
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 0da4c6398..b6ffd04f1 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -109,7 +109,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
 
   diversity_manager_t<i_t, f_t> dm(context);
   dm.timer              = timer_;
-  bool presolve_success = dm.run_presolve(timer_.remaining_time());
+  //bool presolve_success = dm.run_presolve(timer_.remaining_time());
+  bool presolve_success = true;
   if (!presolve_success) {
     CUOPT_LOG_INFO("Problem proven infeasible in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
@@ -117,7 +118,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     context.problem_ptr->post_process_solution(sol);
     return sol;
   }
-  if (context.problem_ptr->empty) {
+  if (0 && context.problem_ptr->empty) {
     CUOPT_LOG_INFO("Problem full reduced in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
     sol.set_problem_fully_reduced();
@@ -126,7 +127,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
   }
 
   // if the problem was reduced to a LP: run concurrent LP
-  if (context.problem_ptr->n_integer_vars == 0) {
+  if (0 && context.problem_ptr->n_integer_vars == 0) {
     CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP");
     pdlp_solver_settings_t<i_t, f_t> settings{};
     settings.time_limit = timer_.remaining_time();
@@ -162,10 +163,13 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
 
     // Fill in the settings for branch and bound
     branch_and_bound_settings.time_limit           = timer_.remaining_time();
+    branch_and_bound_settings.node_limit           = context.settings.node_limit;
+    branch_and_bound_settings.reliability_branching = context.settings.reliability_branching;
     branch_and_bound_settings.print_presolve_stats = false;
     branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap;
     branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap;
     branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance;
+    branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes;
 
     if (context.settings.num_cpu_threads < 0) {
       branch_and_bound_settings.num_threads = omp_get_max_threads() - 1;
@@ -233,6 +237,9 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
                                                 std::ref(branch_and_bound_solution));
   }
 
+  //auto bb_status = branch_and_bound_status_future.get();
+  //CUOPT_LOG_INFO("BB status: %d", bb_status);
+
   // Start the primal heuristics
   auto sol = dm.run_solver();
   if (!context.settings.heuristics_only) {
diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index 7aed72fe0..66a2347d1 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -326,4 +326,155 @@ TEST(dual_simplex, dual_variable_greater_than)
   EXPECT_NEAR(solution.z[1], 0.0, 1e-6);
 }
 
+#if 0
+TEST(dual_simplex, simple_cuts)
+{
+  // minimize x + y + 2 z
+  // subject to x + y + z == 1
+  //            x, y, z >= 0
+
+  raft::handle_t handle{};
+  cuopt::linear_programming::dual_simplex::user_problem_t<int, double> user_problem(&handle);
+  constexpr int m  = 1;
+  constexpr int n  = 3;
+  constexpr int nz = 3;
+
+  user_problem.num_rows = m;
+  user_problem.num_cols = n;
+  user_problem.objective.resize(n);
+  user_problem.objective[0] = 1.0;
+  user_problem.objective[1] = 1.0;
+  user_problem.objective[2] = 2.0;
+  user_problem.A.m          = m;
+  user_problem.A.n          = n;
+  user_problem.A.nz_max     = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start.resize(n + 1);
+  user_problem.A.col_start[0] = 0;
+  user_problem.A.col_start[1] = 1;
+  user_problem.A.col_start[2] = 2;
+  user_problem.A.col_start[3] = 3;
+  user_problem.A.i[0]         = 0;
+  user_problem.A.x[0]         = 1.0;
+  user_problem.A.i[1]         = 0;
+  user_problem.A.x[1]         = 1.0;
+  user_problem.A.i[2]         = 0;
+  user_problem.A.x[2]         = 1.0;
+  user_problem.lower.resize(n, 0.0);
+  user_problem.upper.resize(n, dual_simplex::inf);
+  user_problem.num_range_rows = 0;
+  user_problem.problem_name   = "simple_cuts";
+  user_problem.obj_scale      = 1.0;
+  user_problem.obj_constant   = 0.0;
+  user_problem.rhs.resize(m, 1.0);
+  user_problem.row_sense.resize(m, 'E');
+  user_problem.var_types.resize(
+    n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS);
+
+  cuopt::init_logger_t logger("", true);
+
+  cuopt::linear_programming::dual_simplex::lp_problem_t<int, double> lp(
+    user_problem.handle_ptr, 1, 1, 1);
+  cuopt::linear_programming::dual_simplex::simplex_solver_settings_t<int, double> settings;
+  settings.barrier            = false;
+  settings.barrier_presolve   = false;
+  settings.log.log            = true;
+  settings.log.log_to_console = true;
+  settings.log.printf("Test print\n");
+  std::vector<int> new_slacks;
+  cuopt::linear_programming::dual_simplex::dualize_info_t<int, double> dualize_info;
+  cuopt::linear_programming::dual_simplex::convert_user_problem(
+    user_problem, settings, lp, new_slacks, dualize_info);
+  cuopt::linear_programming::dual_simplex::lp_solution_t<int, double> solution(lp.num_rows,
+                                                                               lp.num_cols);
+  std::vector<cuopt::linear_programming::dual_simplex::variable_status_t> vstatus;
+  std::vector<double> edge_norms;
+  std::vector<int> basic_list(lp.num_rows);
+  std::vector<int> nonbasic_list;
+  cuopt::linear_programming::dual_simplex::basis_update_mpf_t<int, double> basis_update(
+    lp.num_cols, settings.refactor_frequency);
+  double start_time = dual_simplex::tic();
+  printf("Calling solve linear program with advanced basis\n");
+  EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve_linear_program_with_advanced_basis(
+              lp,
+              start_time,
+              settings,
+              solution,
+              basis_update,
+              basic_list,
+              nonbasic_list,
+              vstatus,
+              edge_norms)),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  printf("Solution y: %e\n", solution.y[0]);
+  printf("Solution z: %e %e %e\n", solution.z[0], solution.z[1], solution.z[2]);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-6);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-6);
+
+  // Add a cut z >= 1/3. Needs to be in the form  C*x <= d
+  csr_matrix_t<int, double> cuts(1, n, 1);
+  cuts.row_start[0] = 0;
+  cuts.j[0]         = 2;
+  cuts.x[0]         = -1.0;
+  cuts.row_start[1] = 1;
+  printf("cuts m %d n %d\n", cuts.m, cuts.n);
+  std::vector<double> cut_rhs(1);
+  cut_rhs[0] = -1.0 / 3.0;
+
+  std::vector<variable_type_t> var_types;
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time,
+                                                                                    settings,
+                                                                                    cuts,
+                                                                                    cut_rhs,
+                                                                                    lp,
+                                                                                    solution,
+                                                                                    basis_update,
+                                                                                    basic_list,
+                                                                                    nonbasic_list,
+                                                                                    vstatus,
+                                                                                    edge_norms,
+                                                                                    var_types),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
+
+  cuts.row_start.resize(3);
+  cuts.j.resize(2);
+  cuts.x.resize(2);
+  // Add cut y >= 1/3
+  cuts.j[0]         = 1;
+  cuts.row_start[2] = 2;
+  // Add cut x <= 0.0
+  cuts.j[1]         = 0;
+  cuts.x[1]         = 1.0;
+  cuts.m            = 2;
+  cut_rhs.resize(2);
+  cut_rhs[1] = 0.0;
+
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time,
+                                                                                    settings,
+                                                                                    cuts,
+                                                                                    cut_rhs,
+                                                                                    lp,
+                                                                                    solution,
+                                                                                    basis_update,
+                                                                                    basic_list,
+                                                                                    nonbasic_list,
+                                                                                    vstatus,
+                                                                                    edge_norms,
+                                                                                    var_types),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
+  EXPECT_NEAR(solution.x[0], 0.0, 1e-6);
+  EXPECT_NEAR(solution.x[1], 2.0 / 3.0, 1e-6);
+  EXPECT_NEAR(solution.x[2], 1.0 / 3.0, 1e-6);
+
+}
+#endif
+
 }  // namespace cuopt::linear_programming::dual_simplex::test