Skip to content

Commit 6dce716

Browse files
committed
Merge branch 'main' into HEAD
2 parents b240e36 + 79d726f commit 6dce716

File tree

13 files changed

+488
-120
lines changed

13 files changed

+488
-120
lines changed

.github/workflows/ci.gpu.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,19 @@ jobs:
5151
-DCMAKE_CUDA_COMPILER="$cxx" \
5252
-DCMAKE_CUDA_ARCHITECTURES=${{ matrix.sm }};
5353
# Compile
54-
cmake --build build;
54+
cmake --build build -v;
55+
56+
# Print sccache stats
57+
sccache -s
58+
5559
# Tests
5660
ctest --test-dir build --verbose --output-on-failure --timeout 60;
5761
# Examples
5862
./build/examples/nvexec/maxwell_cpu_st --iterations=1000 --N=512 --run-cpp --run-inline-scheduler
5963
./build/examples/nvexec/maxwell_cpu_mt --iterations=1000 --N=512 --run-std --run-stdpar --run-thread-pool-scheduler
6064
./build/examples/nvexec/maxwell_gpu_s --iterations=1000 --N=512 --run-cuda --run-stdpar --run-stream-scheduler
6165
66+
6267
ci-gpu:
6368
runs-on: ubuntu-latest
6469
name: CI (GPU)

.vscode/launch.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@
4747
"initCommands": ["settings set target.disable-aslr false"],
4848
"args": "${input:CXX_PROGRAM_ARGS}",
4949
},
50+
{
51+
"name": "CUDA Current Target (cuda-gdb)",
52+
"type": "cuda-gdb",
53+
"request": "launch",
54+
"stopAtEntry": false,
55+
"breakOnLaunch": false,
56+
"internalConsoleOptions": "neverOpen",
57+
"program": "${command:cmake.launchTargetPath}",
58+
"cwd": "${command:cmake.launchTargetDirectory}",
59+
"args": "${input:CXX_PROGRAM_ARGS}",
60+
},
5061
],
5162
"inputs": [
5263
{

CMakeLists.txt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ target_compile_options(stdexec INTERFACE
143143
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/Zc:__cplusplus /Zc:preprocessor>
144144
)
145145

146+
option(STDEXEC_ENABLE_EXTRA_TYPE_CHECKING "Enable extra type checking that is costly at compile-time" OFF)
147+
148+
if (STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
149+
target_compile_definitions(stdexec INTERFACE STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
150+
endif()
151+
146152
add_library(STDEXEC::stdexec ALIAS stdexec)
147153

148154
# Don't require building everything when installing
@@ -199,6 +205,17 @@ target_compile_options(stdexec_executable_flags INTERFACE
199205
-include stdexec/__detail/__force_include.hpp>
200206
)
201207

208+
target_compile_definitions(stdexec_executable_flags INTERFACE
209+
STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
210+
211+
# Support target for examples and tests
212+
add_library(nvexec_executable_flags INTERFACE)
213+
214+
target_compile_options(nvexec_executable_flags INTERFACE
215+
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)
216+
target_link_options(nvexec_executable_flags INTERFACE
217+
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)
218+
202219
# Set up nvexec library
203220
option(STDEXEC_ENABLE_CUDA "Enable CUDA targets for non-nvc++ compilers" OFF)
204221
if(CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
@@ -220,9 +237,9 @@ if(STDEXEC_ENABLE_CUDA)
220237
target_link_libraries(nvexec INTERFACE STDEXEC::stdexec)
221238

222239
target_compile_options(nvexec INTERFACE
223-
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
240+
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
224241
target_link_options(nvexec INTERFACE
225-
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
242+
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
226243

227244
if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC"))
228245
include(rapids-cuda)

examples/nvexec/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ add_library(nvexec_example INTERFACE)
4040
target_include_directories(nvexec_example
4141
INTERFACE ${CMAKE_CURRENT_LIST_DIR}
4242
)
43+
target_link_libraries(nvexec_example
44+
INTERFACE nvexec_executable_flags
45+
)
4346

4447
add_library(stdpar_multicore INTERFACE)
4548
target_include_directories(stdpar_multicore

include/exec/inline_scheduler.hpp

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -22,52 +22,5 @@
2222
namespace exec {
2323
// A simple scheduler that executes its continuation inline, on the
2424
// thread of the caller of start().
25-
struct inline_scheduler {
26-
template <class R_>
27-
struct __op {
28-
using R = stdexec::__t<R_>;
29-
STDEXEC_NO_UNIQUE_ADDRESS R rec_;
30-
31-
friend void tag_invoke(stdexec::start_t, __op& op) noexcept {
32-
stdexec::set_value((R&&) op.rec_);
33-
}
34-
};
35-
36-
struct __sender {
37-
using is_sender = void;
38-
using completion_signatures = stdexec::completion_signatures<stdexec::set_value_t()>;
39-
40-
template <class R>
41-
friend auto tag_invoke(stdexec::connect_t, __sender, R&& rec) //
42-
noexcept(stdexec::__nothrow_constructible_from<stdexec::__decay_t<R>, R>)
43-
-> __op<stdexec::__x<stdexec::__decay_t<R>>> {
44-
return {(R&&) rec};
45-
}
46-
47-
struct __env {
48-
friend inline_scheduler
49-
tag_invoke(stdexec::get_completion_scheduler_t<stdexec::set_value_t>, const __env&) //
50-
noexcept {
51-
return {};
52-
}
53-
};
54-
55-
friend __env tag_invoke(stdexec::get_env_t, const __sender&) noexcept {
56-
return {};
57-
}
58-
};
59-
60-
STDEXEC_DETAIL_CUDACC_HOST_DEVICE //
61-
friend __sender
62-
tag_invoke(stdexec::schedule_t, const inline_scheduler&) noexcept {
63-
return {};
64-
}
65-
66-
friend stdexec::forward_progress_guarantee
67-
tag_invoke(stdexec::get_forward_progress_guarantee_t, const inline_scheduler&) noexcept {
68-
return stdexec::forward_progress_guarantee::weakly_parallel;
69-
}
70-
71-
bool operator==(const inline_scheduler&) const noexcept = default;
72-
};
25+
using inline_scheduler = stdexec::__inln::__scheduler;
7326
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
* Copyright (c) 2023 Maikel Nadolski
3+
* Copyright (c) 2023 NVIDIA Corporation
4+
*
5+
* Licensed under the Apache License Version 2.0 with LLVM Exceptions
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* https://llvm.org/LICENSE.txt
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
#pragma once
18+
19+
#include "../sequence_senders.hpp"
20+
21+
#include "../__detail/__basic_sequence.hpp"
22+
23+
namespace exec {
24+
namespace __transform_each {
25+
using namespace stdexec;
26+
27+
template <class _Receiver, class _Adaptor>
28+
struct __operation_base {
29+
_Receiver __receiver_;
30+
_Adaptor __adaptor_;
31+
};
32+
33+
template <class _ReceiverId, class _Adaptor>
34+
struct __receiver {
35+
using _Receiver = stdexec::__t<_ReceiverId>;
36+
37+
struct __t {
38+
using is_receiver = void;
39+
using __id = __receiver;
40+
__operation_base<_Receiver, _Adaptor>* __op_;
41+
42+
template <same_as<set_next_t> _SetNext, same_as<__t> _Self, class _Item>
43+
requires __callable<_Adaptor&, _Item>
44+
&& __callable<exec::set_next_t, _Receiver&, __call_result_t<_Adaptor&, _Item>>
45+
friend auto tag_invoke(_SetNext, _Self& __self, _Item&& __item) noexcept(
46+
__nothrow_callable<_SetNext, _Receiver&, __call_result_t<_Adaptor&, _Item>> //
47+
&& __nothrow_callable<_Adaptor&, _Item>)
48+
-> next_sender_of_t<_Receiver, __call_result_t<_Adaptor&, _Item>> {
49+
return exec::set_next(
50+
__self.__op_->__receiver_, __self.__op_->__adaptor_(static_cast<_Item&&>(__item)));
51+
}
52+
53+
template <same_as<set_value_t> _SetValue, same_as<__t> _Self>
54+
friend void tag_invoke(_SetValue, _Self&& __self) noexcept {
55+
stdexec::set_value(static_cast<_Receiver&&>(__self.__op_->__receiver_));
56+
}
57+
58+
template <same_as<set_stopped_t> _SetStopped, same_as<__t> _Self>
59+
requires __callable<_SetStopped, _Receiver&&>
60+
friend void tag_invoke(_SetStopped, _Self&& __self) noexcept {
61+
stdexec::set_stopped(static_cast<_Receiver&&>(__self.__op_->__receiver_));
62+
}
63+
64+
template <same_as<set_error_t> _SetError, same_as<__t> _Self, class _Error>
65+
requires __callable<_SetError, _Receiver&&, _Error>
66+
friend void tag_invoke(_SetError, _Self&& __self, _Error&& __error) noexcept {
67+
stdexec::set_error(
68+
static_cast<_Receiver&&>(__self.__op_->__receiver_), static_cast<_Error&&>(__error));
69+
}
70+
71+
template <same_as<get_env_t> _GetEnv, __decays_to<__t> _Self>
72+
friend env_of_t<_Receiver> tag_invoke(_GetEnv, _Self&& __self) noexcept {
73+
return stdexec::get_env(__self.__op_->__receiver_);
74+
}
75+
};
76+
};
77+
78+
template <class _Sender, class _ReceiverId, class _Adaptor>
79+
struct __operation {
80+
using _Receiver = stdexec::__t<_ReceiverId>;
81+
82+
struct __t : __operation_base<_Receiver, _Adaptor> {
83+
using __id = __operation;
84+
subscribe_result_t<_Sender, stdexec::__t<__receiver<_ReceiverId, _Adaptor>>> __op_;
85+
86+
__t(_Sender&& __sndr, _Receiver __rcvr, _Adaptor __adaptor)
87+
: __operation_base<
88+
_Receiver,
89+
_Adaptor>{static_cast<_Receiver&&>(__rcvr), static_cast<_Adaptor&&>(__adaptor)}
90+
, __op_{exec::subscribe(
91+
static_cast<_Sender&&>(__sndr),
92+
stdexec::__t<__receiver<_ReceiverId, _Adaptor>>{this})} {
93+
}
94+
95+
friend void tag_invoke(start_t, __t& __self) noexcept {
96+
stdexec::start(__self.__op_);
97+
}
98+
};
99+
};
100+
101+
template <class _Receiver>
102+
struct __subscribe_fn {
103+
_Receiver& __rcvr_;
104+
105+
template <class _Adaptor, class _Sequence>
106+
auto operator()(__ignore, _Adaptor __adaptor, _Sequence&& __sequence) noexcept(
107+
__nothrow_decay_copyable<_Adaptor> && __nothrow_decay_copyable<_Sequence>
108+
&& __nothrow_decay_copyable<_Receiver>)
109+
-> __t< __operation<_Sequence, __id<_Receiver>, _Adaptor>> {
110+
return {
111+
static_cast<_Sequence&&>(__sequence),
112+
static_cast<_Receiver&&>(__rcvr_),
113+
static_cast<_Adaptor&&>(__adaptor)};
114+
}
115+
};
116+
117+
template <class _Adaptor>
118+
struct _NOT_CALLABLE_ADAPTOR_ { };
119+
120+
template <class _Item>
121+
struct _WITH_ITEM_SENDER_ { };
122+
123+
template <class _Adaptor, class _Item>
124+
auto __try_call(_Item*)
125+
-> stdexec::__mexception<_NOT_CALLABLE_ADAPTOR_<_Adaptor&>, _WITH_ITEM_SENDER_<stdexec::__name_of<_Item>>>;
126+
127+
template <class _Adaptor, class _Item>
128+
requires stdexec::__callable<_Adaptor&, _Item>
129+
stdexec::__msuccess __try_call(_Item*);
130+
131+
template <class _Adaptor, class... _Items>
132+
auto __try_calls(item_types<_Items...>*)
133+
-> decltype((stdexec::__msuccess() && ... && __try_call<_Adaptor>((_Items*) nullptr)));
134+
135+
template <class _Adaptor, class _Items>
136+
concept __callabale_adaptor_for = requires(_Items* __items) {
137+
{ __try_calls<stdexec::__decay_t<_Adaptor>>(__items) } -> stdexec::__ok;
138+
};
139+
140+
struct transform_each_t {
141+
template <sender _Sequence, __sender_adaptor_closure _Adaptor>
142+
auto operator()(_Sequence&& __sndr, _Adaptor&& __adaptor) const noexcept(
143+
__nothrow_decay_copyable<_Sequence> //
144+
&& __nothrow_decay_copyable<_Adaptor>) {
145+
return make_sequence_expr<transform_each_t>(
146+
static_cast<_Adaptor&&>(__adaptor), static_cast<_Sequence&&>(__sndr));
147+
}
148+
149+
template <class _Adaptor>
150+
constexpr auto operator()(_Adaptor __adaptor) const noexcept
151+
-> __binder_back<transform_each_t, _Adaptor> {
152+
return {{}, {}, {static_cast<_Adaptor&&>(__adaptor)}};
153+
}
154+
155+
template <class _Self, class _Env>
156+
using __completion_sigs_t = __sequence_completion_signatures_of_t<__child_of<_Self>, _Env>;
157+
158+
template <sender_expr_for<transform_each_t> _Self, class _Env>
159+
static __completion_sigs_t<_Self, _Env> get_completion_signatures(_Self&&, _Env&&) noexcept {
160+
return {};
161+
}
162+
163+
template <class _Self, class _Env>
164+
using __item_types_t = stdexec::__mapply<
165+
stdexec::__transform<
166+
stdexec::__mbind_front_q<__call_result_t, __data_of<_Self>&>,
167+
stdexec::__munique<stdexec::__q<item_types>>>,
168+
item_types_of_t<__child_of<_Self>, _Env>>;
169+
170+
template <sender_expr_for<transform_each_t> _Self, class _Env>
171+
static __item_types_t<_Self, _Env> get_item_types(_Self&&, _Env&&) noexcept {
172+
return {};
173+
}
174+
175+
template <class _Self, class _Receiver>
176+
using __receiver_t = __t<__receiver<__id<_Receiver>, __data_of<_Self>>>;
177+
178+
template <class _Self, class _Receiver>
179+
using __operation_t = __t< __operation<__child_of<_Self>, __id<_Receiver>, __data_of<_Self>>>;
180+
181+
template <sender_expr_for<transform_each_t> _Self, receiver _Receiver>
182+
requires __callabale_adaptor_for<
183+
__data_of<_Self>,
184+
__item_types_t<_Self, env_of_t<_Receiver>>>
185+
&& sequence_receiver_of<_Receiver, __item_types_t<_Self, env_of_t<_Receiver>>>
186+
&& sequence_sender_to<__child_of<_Self>, __receiver_t<_Self, _Receiver>>
187+
static auto subscribe(_Self&& __self, _Receiver __rcvr) noexcept(
188+
__nothrow_callable<apply_sender_t, _Self, __subscribe_fn<_Receiver>>)
189+
-> __call_result_t<apply_sender_t, _Self, __subscribe_fn<_Receiver>> {
190+
return apply_sender(static_cast<_Self&&>(__self), __subscribe_fn<_Receiver>{__rcvr});
191+
}
192+
193+
template <sender_expr_for<transform_each_t> _Sexpr>
194+
static env_of_t<__child_of<_Sexpr>> get_env(const _Sexpr& __sexpr) noexcept {
195+
return apply_sender(__sexpr, []<class _Child>(__ignore, __ignore, const _Child& __child) {
196+
return stdexec::get_env(__child);
197+
});
198+
}
199+
};
200+
}
201+
202+
using __transform_each::transform_each_t;
203+
inline constexpr transform_each_t transform_each{};
204+
}

include/exec/sequence_senders.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,11 @@ namespace exec {
267267

268268
template <class _Sequence, class _Env>
269269
using __sequence_completion_signatures_of_t = stdexec::__concat_completion_signatures_t<
270-
stdexec::completion_signatures<stdexec::set_value_t()>,
270+
stdexec::__try_make_completion_signatures<
271+
_Sequence,
272+
_Env,
273+
stdexec::completion_signatures<stdexec::set_value_t()>,
274+
stdexec::__mconst<stdexec::completion_signatures<>>>,
271275
stdexec::__mapply<
272276
stdexec::__q<stdexec::__concat_completion_signatures_t>,
273277
stdexec::__mapply<

include/stdexec/__detail/__config.hpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,31 @@
239239
#define STDEXEC_FUN_ARGS(...) STDEXEC_CAT(STDEXEC_EAT_THIS_DETAIL_, __VA_ARGS__))
240240
#endif
241241

242+
// Configure extra type checking
243+
#define STDEXEC_TYPE_CHECKING_ZERO() 0
244+
#define STDEXEC_TYPE_CHECKING_ONE() 1
245+
#define STDEXEC_TYPE_CHECKING_TWO() 2
246+
247+
#define STDEXEC_PROBE_TYPE_CHECKING_ STDEXEC_TYPE_CHECKING_ONE
248+
#define STDEXEC_PROBE_TYPE_CHECKING_0 STDEXEC_TYPE_CHECKING_ZERO
249+
#define STDEXEC_PROBE_TYPE_CHECKING_1 STDEXEC_TYPE_CHECKING_ONE
250+
#define STDEXEC_PROBE_TYPE_CHECKING_STDEXEC_ENABLE_EXTRA_TYPE_CHECKING STDEXEC_TYPE_CHECKING_TWO
251+
252+
#define STDEXEC_TYPE_CHECKING_WHICH3(...) STDEXEC_PROBE_TYPE_CHECKING_ ## __VA_ARGS__
253+
#define STDEXEC_TYPE_CHECKING_WHICH2(...) STDEXEC_TYPE_CHECKING_WHICH3(__VA_ARGS__)
254+
#define STDEXEC_TYPE_CHECKING_WHICH STDEXEC_TYPE_CHECKING_WHICH2(STDEXEC_ENABLE_EXTRA_TYPE_CHECKING)
255+
242256
#ifndef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
243-
// Compile times are bad enough on nvhpc. Disable extra type checking by default.
244-
#if STDEXEC_NVHPC()
257+
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 0
258+
#elif STDEXEC_TYPE_CHECKING_WHICH() == 2
259+
// do nothing
260+
#elif STDEXEC_TYPE_CHECKING_WHICH() == 0
261+
#undef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
245262
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 0
246263
#else
264+
#undef STDEXEC_ENABLE_EXTRA_TYPE_CHECKING
247265
#define STDEXEC_ENABLE_EXTRA_TYPE_CHECKING() 1
248266
#endif
249-
#endif
250267

251268
namespace stdexec {
252269
}

0 commit comments

Comments
 (0)