Skip to content

Commit fc00a6d

Browse files
authored
Merge branch 'main' into patch-3
2 parents 88ccf54 + 6405370 commit fc00a6d

File tree

19 files changed

+524
-183
lines changed

19 files changed

+524
-183
lines changed

CMakeLists.txt

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -427,18 +427,22 @@ if (STDEXEC_ENABLE_NUMA)
427427
target_compile_definitions(stdexec INTERFACE STDEXEC_ENABLE_NUMA)
428428
endif()
429429

430-
set(SYSTEM_CONTEXT_SOURCES src/system_context/system_context.cpp)
431-
add_library(system_context ${SYSTEM_CONTEXT_SOURCES})
432-
target_compile_features(system_context PUBLIC cxx_std_20)
433-
set_target_properties(system_context PROPERTIES
434-
CXX_STANDARD 20
435-
CXX_STANDARD_REQUIRED ON
436-
CXX_EXTENSIONS OFF)
437-
target_compile_options(system_context PUBLIC
438-
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/Zc:__cplusplus /Zc:preprocessor /Zc:externConstexpr>
439-
)
440-
add_library(STDEXEC::system_context ALIAS system_context)
441-
target_link_libraries(system_context PUBLIC stdexec)
430+
option(STDEXEC_BUILD_SYSTEM_CONTEXT "Build the system_context compiled library" OFF)
431+
432+
if(STDEXEC_BUILD_SYSTEM_CONTEXT)
433+
set(SYSTEM_CONTEXT_SOURCES src/system_context/system_context.cpp)
434+
add_library(system_context ${SYSTEM_CONTEXT_SOURCES})
435+
target_compile_features(system_context PUBLIC cxx_std_20)
436+
set_target_properties(system_context PROPERTIES
437+
CXX_STANDARD 20
438+
CXX_STANDARD_REQUIRED ON
439+
CXX_EXTENSIONS OFF)
440+
target_compile_options(system_context PUBLIC
441+
$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/Zc:__cplusplus /Zc:preprocessor /Zc:externConstexpr>
442+
)
443+
add_library(STDEXEC::system_context ALIAS system_context)
444+
target_link_libraries(system_context PUBLIC stdexec)
445+
endif()
442446

443447

444448
option(STDEXEC_ENABLE_IO_URING "Enable the use of the io_uring scheduler on Linux" OFF)
@@ -504,8 +508,13 @@ endif()
504508
if(STDEXEC_INSTALL)
505509
include(CPack)
506510

507-
install(TARGETS stdexec system_context
508-
EXPORT stdexec-exports
511+
set(stdexec_install_targets stdexec)
512+
if(STDEXEC_BUILD_SYSTEM_CONTEXT)
513+
list(APPEND stdexec_install_targets system_context)
514+
endif()
515+
516+
install(TARGETS ${stdexec_install_targets}
517+
EXPORT stdexec-exports
509518
FILE_SET headers
510519
FILE_SET version_config)
511520

conanfile.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ class StdexecPackage(ConanFile):
1414
license = "Apache 2.0"
1515

1616
settings = "os", "arch", "compiler", "build_type"
17+
options = {
18+
"system_context": [True, False],
19+
}
20+
default_options = {
21+
"system_context": False,
22+
}
1723
exports_sources = (
1824
"include/*",
1925
"src/*",
@@ -24,6 +30,12 @@ class StdexecPackage(ConanFile):
2430
)
2531
generators = "CMakeToolchain"
2632

33+
def configure(self):
34+
if self.options.system_context:
35+
self.package_type = "static-library"
36+
else:
37+
self.package_type = "header-library"
38+
2739
def validate(self):
2840
check_min_cppstd(self, "20")
2941

@@ -37,18 +49,21 @@ def layout(self):
3749

3850
def build(self):
3951
tests = "OFF" if self.conf.get("tools.build:skip_test", default=False) else "ON"
52+
system_context = "ON" if self.options.system_context else "OFF"
4053

4154
cmake = CMake(self)
4255
cmake.configure(variables={
4356
"STDEXEC_BUILD_TESTS": tests,
4457
"STDEXEC_BUILD_EXAMPLES": tests,
58+
"STDEXEC_BUILD_SYSTEM_CONTEXT": system_context,
4559
})
4660
cmake.build()
4761
cmake.test()
4862

4963
def package_id(self):
50-
# Clear settings because this package is header-only.
51-
self.info.clear()
64+
if not self.info.options.system_context:
65+
# Clear settings because this package is header-only.
66+
self.info.clear()
5267

5368
def package(self):
5469
cmake = CMake(self)
@@ -58,4 +73,8 @@ def package_info(self):
5873
self.cpp_info.set_property("cmake_file_name", "P2300")
5974
self.cpp_info.set_property("cmake_target_name", "P2300::P2300")
6075
self.cpp_info.set_property("cmake_target_aliases", ["STDEXEC::stdexec"])
61-
self.cpp_info.libs = ["system_context"]
76+
if self.options.system_context:
77+
self.cpp_info.components["system_context"].libs = ["system_context"]
78+
self.cpp_info.components["system_context"].set_property(
79+
"cmake_target_name", "STDEXEC::system_context"
80+
)

include/nvexec/stream/common.cuh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -532,28 +532,21 @@ namespace nv::execution
532532
{}
533533

534534
template <class... Args>
535-
STDEXEC_ATTRIBUTE(host, device)
535+
STDEXEC_ATTRIBUTE(device)
536536
void set_value(Args&&... args) noexcept
537537
{
538538
using tuple_t = decayed_tuple_t<set_value_t, Args...>;
539539
variant_->template emplace<tuple_t>(set_value_t(), static_cast<Args&&>(args)...);
540540
producer_(task_);
541541
}
542542

543-
STDEXEC_ATTRIBUTE(host, device) void set_stopped() noexcept
544-
{
545-
using tuple_t = decayed_tuple_t<set_stopped_t>;
546-
variant_->template emplace<tuple_t>(set_stopped_t());
547-
producer_(task_);
548-
}
549-
550543
template <class Error>
551-
STDEXEC_ATTRIBUTE(host, device)
544+
STDEXEC_ATTRIBUTE(device)
552545
void set_error(Error&& err) noexcept
553546
{
554547
if constexpr (__decays_to<Error, std::exception_ptr>)
555548
{
556-
// What is `exception_ptr` but death pending
549+
// What is `exception_ptr` but death pending?
557550
using tuple_t = decayed_tuple_t<set_error_t, cudaError_t>;
558551
variant_->template emplace<tuple_t>(STDEXEC::set_error, cudaErrorUnknown);
559552
}
@@ -565,6 +558,15 @@ namespace nv::execution
565558
producer_(task_);
566559
}
567560

561+
STDEXEC_ATTRIBUTE(device)
562+
void set_stopped() noexcept
563+
{
564+
using tuple_t = decayed_tuple_t<set_stopped_t>;
565+
variant_->template emplace<tuple_t>(set_stopped_t());
566+
producer_(task_);
567+
}
568+
569+
[[nodiscard]]
568570
auto get_env() const noexcept -> Env const &
569571
{
570572
return *env_;

include/nvexec/stream/repeat_n.cuh

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,8 @@ namespace nv::execution::_strm
6868
{
6969
using operation_state_concept = STDEXEC::operation_state_t;
7070

71-
using scheduler_t =
72-
STDEXEC::__result_of<STDEXEC::get_completion_scheduler<STDEXEC::set_value_t>,
73-
STDEXEC::env_of_t<Sender>,
74-
STDEXEC::env_of_t<Receiver>>;
75-
76-
using inner_sender_t =
77-
STDEXEC::__result_of<exec::sequence, STDEXEC::schedule_result_t<scheduler_t&>, Sender&>;
71+
using scheduler_t = __completion_scheduler_of_t<set_value_t, Sender, env_of_t<Receiver>>;
72+
using inner_sender_t = STDEXEC::__result_of<STDEXEC::starts_on, scheduler_t, Sender&>;
7873
using inner_opstate_t = STDEXEC::connect_result_t<inner_sender_t, receiver<opstate>>;
7974

8075
explicit opstate(Sender&& sndr, Receiver rcvr, std::size_t count, scheduler_t sched)
@@ -91,9 +86,9 @@ namespace nv::execution::_strm
9186

9287
auto& _connect()
9388
{
94-
inner_opstate_.__emplace_from(STDEXEC::connect,
95-
exec::sequence(STDEXEC::schedule(sched_), sndr_),
96-
receiver{*this});
89+
return inner_opstate_.__emplace_from(STDEXEC::connect,
90+
STDEXEC::starts_on(sched_, sndr_),
91+
receiver{*this});
9792
}
9893

9994
template <class Tag, class... Args>
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright (c) 2026 NVIDIA Corporation
3+
*
4+
* Licensed under the Apache License Version 2.0 with LLVM Exceptions
5+
* (the "License"); you may not use this file except in compliance with
6+
* the License. You may obtain a copy of the License at
7+
*
8+
* https://llvm.org/LICENSE.txt
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
// clang-format Language: Cpp
17+
18+
#pragma once
19+
20+
#include "../../stdexec/execution.hpp"
21+
#include "../../stdexec/functional.hpp"
22+
23+
#include "let_xxx.cuh"
24+
25+
#include "common.cuh"
26+
27+
namespace nv::execution::_strm
28+
{
29+
template <>
30+
struct transform_sender_for<STDEXEC::starts_on_t>
31+
{
32+
template <class Env, STDEXEC::scheduler Scheduler, STDEXEC::sender Sender>
33+
auto operator()(Env const &, STDEXEC::starts_on_t, Scheduler&& sched, Sender&& sndr) const
34+
{
35+
return STDEXEC::let_value(STDEXEC::schedule(sched),
36+
STDEXEC::__always(static_cast<Sender&&>(sndr)));
37+
}
38+
};
39+
} // namespace nv::execution::_strm

include/nvexec/stream/then.cuh

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,9 @@ namespace nv::execution::_strm
7171
requires std::invocable<Fun, __decay_t<Args>...>
7272
void set_value(Args&&... args) noexcept
7373
{
74-
using result_t = std::invoke_result_t<Fun, __decay_t<Args>...>;
75-
constexpr bool does_not_return_a_value = std::is_same_v<void, result_t>;
76-
_strm::opstate_base<Receiver>& opstate = opstate_;
77-
cudaStream_t stream = opstate.get_stream();
74+
using result_t = std::invoke_result_t<Fun, __decay_t<Args>...>;
75+
constexpr bool does_not_return_a_value = std::is_same_v<void, result_t>;
76+
cudaStream_t stream = opstate_.get_stream();
7877

7978
if constexpr (does_not_return_a_value)
8079
{
@@ -83,29 +82,29 @@ namespace nv::execution::_strm
8382
if (cudaError_t status = STDEXEC_LOG_CUDA_API(cudaPeekAtLastError());
8483
status == cudaSuccess)
8584
{
86-
opstate.propagate_completion_signal(STDEXEC::set_value);
85+
opstate_.propagate_completion_signal(STDEXEC::set_value);
8786
}
8887
else
8988
{
90-
opstate.propagate_completion_signal(STDEXEC::set_error, std::move(status));
89+
opstate_.propagate_completion_signal(STDEXEC::set_error, std::move(status));
9190
}
9291
}
9392
else
9493
{
9594
using decayed_result_t = __decay_t<result_t>;
96-
auto* d_result = static_cast<decayed_result_t*>(opstate.temp_storage_);
95+
auto* d_result = static_cast<decayed_result_t*>(opstate_.temp_storage_);
9796
_then_kernel_with_result<Args&&...>
9897
<<<1, 1, 0, stream>>>(std::move(f_), d_result, static_cast<Args&&>(args)...);
99-
opstate.defer_temp_storage_destruction(d_result);
98+
opstate_.defer_temp_storage_destruction(d_result);
10099

101100
if (cudaError_t status = STDEXEC_LOG_CUDA_API(cudaPeekAtLastError());
102101
status == cudaSuccess)
103102
{
104-
opstate.propagate_completion_signal(STDEXEC::set_value, std::move(*d_result));
103+
opstate_.propagate_completion_signal(STDEXEC::set_value, std::move(*d_result));
105104
}
106105
else
107106
{
108-
opstate.propagate_completion_signal(STDEXEC::set_error, std::move(status));
107+
opstate_.propagate_completion_signal(STDEXEC::set_error, std::move(status));
109108
}
110109
}
111110
}
@@ -185,7 +184,7 @@ namespace nv::execution::_strm
185184
static_cast<Self&&>(self).sndr_,
186185
static_cast<Receiver&&>(rcvr),
187186
[&](_strm::opstate_base<Receiver>& stream_provider) -> receiver_t<Receiver>
188-
{ return receiver_t<Receiver>(self.fun_, stream_provider); });
187+
{ return receiver_t<Receiver>(static_cast<Self&&>(self).fun_, stream_provider); });
189188
}
190189
STDEXEC_EXPLICIT_THIS_END(connect)
191190

@@ -209,11 +208,11 @@ namespace nv::execution::_strm
209208
struct transform_sender_for<STDEXEC::then_t>
210209
{
211210
template <class Env, class Fn, class CvSender>
212-
auto operator()(Env const &, __ignore, Fn fun, CvSender&& sndr) const
211+
auto operator()(Env const &, __ignore, Fn&& fun, CvSender&& sndr) const
213212
{
214213
if constexpr (stream_completing_sender<CvSender, Env>)
215214
{
216-
using _sender_t = then_sender<__decay_t<CvSender>, Fn>;
215+
using _sender_t = then_sender<__decay_t<CvSender>, __decay_t<Fn>>;
217216
return _sender_t{static_cast<CvSender&&>(sndr), static_cast<Fn&&>(fun)};
218217
}
219218
else

include/nvexec/stream_context.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "stream/schedule_from.cuh" // IWYU pragma: export
3636
#include "stream/split.cuh" // IWYU pragma: export
3737
#include "stream/start_detached.cuh" // IWYU pragma: export
38+
#include "stream/starts_on.cuh" // IWYU pragma: export
3839
#include "stream/sync_wait.cuh" // IWYU pragma: export
3940
#include "stream/then.cuh" // IWYU pragma: export
4041
#include "stream/upon_error.cuh" // IWYU pragma: export

include/stdexec/__detail/__bulk.hpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,24 @@ namespace STDEXEC
242242
template <class _Fun>
243243
STDEXEC_HOST_DEVICE_DEDUCTION_GUIDE __as_bulk_chunked_fn(_Fun) -> __as_bulk_chunked_fn<_Fun>;
244244

245+
template <class _Child>
246+
struct __attrs : env<__fwd_env_t<env_of_t<_Child>>>
247+
{
248+
using __base_t = env<__fwd_env_t<env_of_t<_Child>>>;
249+
using __base_t::query;
250+
251+
constexpr explicit __attrs(_Child const & __child) noexcept
252+
: __base_t{__fwd_env(STDEXEC::get_env(__child))}
253+
{}
254+
255+
template <class... _Env>
256+
STDEXEC_ATTRIBUTE(nodiscard, always_inline, host, device)
257+
constexpr auto query(__get_completion_behavior_t<set_value_t>, _Env&&...) const noexcept
258+
{
259+
return STDEXEC::__get_completion_behavior<set_value_t, _Child, _Env...>();
260+
}
261+
};
262+
245263
template <class _AlgoTag>
246264
struct __impl_base : __sexpr_defaults
247265
{
@@ -252,9 +270,10 @@ namespace STDEXEC
252270
using __shape_t = decltype(__decay_t<__data_of<_Sender>>::__shape_);
253271

254272
// Forward the child sender's environment (which contains completion scheduler)
255-
static constexpr auto __get_attrs = [](__ignore, __ignore, auto const & __child) noexcept
273+
static constexpr auto __get_attrs = //
274+
[]<class _Child>(__ignore, __ignore, _Child const & __child) noexcept
256275
{
257-
return __fwd_env(STDEXEC::get_env(__child));
276+
return __attrs{__child};
258277
};
259278

260279
template <class _Sender, class... _Env>

0 commit comments

Comments
 (0)