-
Notifications
You must be signed in to change notification settings - Fork 104
Add clang host build and ThreadSanitizer support, race condition fixes #603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5894ffe
9911c3a
24b828e
d52bcb0
fb0c072
e3f2a79
057ecc7
2463de6
8356f1d
192ce53
8b31229
3cced01
63c917a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,48 @@ namespace cuopt::linear_programming::dual_simplex { | |
|
|
||
| auto constexpr use_gpu = true; | ||
|
|
||
| // non-template wrappers to work around clang compiler bug | ||
| [[maybe_unused]] static void pairwise_multiply( | ||
| float* a, float* b, float* out, int size, rmm::cuda_stream_view stream) | ||
| { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream); | ||
| } | ||
|
|
||
| [[maybe_unused]] static void pairwise_multiply( | ||
| double* a, double* b, double* out, int size, rmm::cuda_stream_view stream) | ||
| { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream); | ||
| } | ||
|
|
||
| [[maybe_unused]] static void axpy( | ||
| float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream) | ||
| { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(x, y), | ||
| out, | ||
| size, | ||
| [alpha, beta] __host__ __device__(float a, float b) { return alpha * a + beta * b; }, | ||
| stream); | ||
| } | ||
|
|
||
| [[maybe_unused]] static void axpy(double alpha, | ||
| double* x, | ||
| double beta, | ||
| double* y, | ||
| double* out, | ||
| int size, | ||
| rmm::cuda_stream_view stream) | ||
| { | ||
| cub::DeviceTransform::Transform( | ||
| cuda::std::make_tuple(x, y), | ||
| out, | ||
| size, | ||
| [alpha, beta] __host__ __device__(double a, double b) { return alpha * a + beta * b; }, | ||
| stream); | ||
| } | ||
|
Comment on lines
+47
to
+87
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add CUDA error checking and improve bug documentation. The wrapper functions lack CUDA error checking, which violates the coding guideline requiring verification for all CUDA operations. Additionally, the comment about "clang compiler bug" is vague and provides no context for future maintainers. As per coding guidelines, every CUDA operation must have error checking. Apply this diff to add error checking: [[maybe_unused]] static void pairwise_multiply(
float* a, float* b, float* out, int size, rmm::cuda_stream_view stream)
{
- cub::DeviceTransform::Transform(
+ RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream);
+ stream.synchronize());
}
[[maybe_unused]] static void pairwise_multiply(
double* a, double* b, double* out, int size, rmm::cuda_stream_view stream)
{
- cub::DeviceTransform::Transform(
+ RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream);
+ stream.synchronize());
}
[[maybe_unused]] static void axpy(
float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream)
{
- cub::DeviceTransform::Transform(
+ RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
cuda::std::make_tuple(x, y),
out,
size,
[alpha, beta] __host__ __device__(float a, float b) { return alpha * a + beta * b; },
- stream);
+ stream));
+ RAFT_CHECK_CUDA(stream);
}
[[maybe_unused]] static void axpy(double alpha,
double* x,
double beta,
double* y,
double* out,
int size,
rmm::cuda_stream_view stream)
{
- cub::DeviceTransform::Transform(
+ RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
cuda::std::make_tuple(x, y),
out,
size,
[alpha, beta] __host__ __device__(double a, double b) { return alpha * a + beta * b; },
- stream);
+ stream));
+ RAFT_CHECK_CUDA(stream);
}Also improve the comment to document the specific issue: -// non-template wrappers to work around clang compiler bug
+// Non-template wrappers to work around clang compiler issue with thrust::transform template
+// instantiation. These use cub::DeviceTransform directly with explicit float/double overloads
+// instead of templates to ensure proper compilation with clang when ThreadSanitizer is enabled.Consider whether a templated version with explicit instantiation could work once the clang/TSan build is stable: template<typename T>
[[maybe_unused]] static void pairwise_multiply(
T* a, T* b, T* out, int size, rmm::cuda_stream_view stream)
{
RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream));
RAFT_CHECK_CUDA(stream);
}
// Explicit instantiations
template void pairwise_multiply<float>(float*, float*, float*, int, rmm::cuda_stream_view);
template void pairwise_multiply<double>(double*, double*, double*, int, rmm::cuda_stream_view); |
||
|
|
||
| template <typename i_t, typename f_t> | ||
| class iteration_data_t { | ||
| public: | ||
|
|
@@ -1404,12 +1446,7 @@ class iteration_data_t { | |
|
|
||
| // diag.pairwise_product(x1, r1); | ||
| // r1 <- D * x_1 | ||
| thrust::transform(handle_ptr->get_thrust_policy(), | ||
| d_x1.data(), | ||
| d_x1.data() + n, | ||
| d_diag_.data(), | ||
| d_r1.data(), | ||
| thrust::multiplies<f_t>()); | ||
| pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_); | ||
|
|
||
| // r1 <- Q x1 + D x1 | ||
| if (Q.n > 0) { | ||
|
|
@@ -1419,12 +1456,7 @@ class iteration_data_t { | |
|
|
||
| // y1 <- - alpha * r1 + beta * y1 | ||
| // y1.axpy(-alpha, r1, beta); | ||
| thrust::transform(handle_ptr->get_thrust_policy(), | ||
| d_r1.data(), | ||
| d_r1.data() + n, | ||
| d_y1.data(), | ||
| d_y1.data(), | ||
| axpy_op<f_t>{-alpha, beta}); | ||
| axpy(-alpha, d_r1.data(), beta, d_y1.data(), d_y1.data(), n, stream_view_); | ||
|
|
||
| // matrix_transpose_vector_multiply(A, alpha, x2, 1.0, y1); | ||
| cusparse_view_.transpose_spmv(alpha, d_x2, 1.0, d_y1); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ThreadSanitizer integration is fine, but TSAN_OPTIONS comment has typos
The
BUILD_TSANblock cleanly wires in-fsanitize=thread -fno-omit-frame-pointer -gand matching link flags behind a simple CMake option, which is whatbuild.shexpects. That’s a solid pattern.In the usage comment, though, the
TSAN_OPTIONSexample contains typos:TSAN_OPTIONS='suppresions=cpp/tsan_suppressions.txt:...' # and: libarcher.so must be presetnFor discoverability, consider correcting this to:
This avoids users copy‑pasting an invalid
TSAN_OPTIONSkey.🤖 Prompt for AI Agents