@@ -36,6 +36,7 @@ class identity_value {
3636 using type = T;
3737 constexpr identity_value (T const & identity) noexcept : identity_(identity) {}
3838 constexpr T value () const noexcept { return identity_; }
39+
3940 private:
4041 T identity_;
4142};
@@ -64,13 +65,16 @@ class identity_value {
6465 * };
6566 *
6667 * int main() {
67- * cuco::identity_value<int> identity{0}; // define the identity value for the given reduction operation, i.e., op(identity, x) == x
68+ * cuco::identity_value<int> identity{0}; // define the identity value for the given reduction
69+ * operation, i.e., op(identity, x) == x
6870 *
69- * auto f1 = cuco::reduction_functor<custom_plus<int>, int>(identity); // synchronized via CAS-loop
70- * auto f2 = cuco::reduction_functor<custom_plus_sync<int>, int>(identity); // implicitly synchronized
71+ * auto f1 = cuco::reduction_functor<custom_plus<int>, int>(identity); // synchronized via
72+ * CAS-loop auto f2 = cuco::reduction_functor<custom_plus_sync<int>, int>(identity); // implicitly
73+ * synchronized
7174 *
7275 * auto custom_plus_lambda = [] __device__ (int lhs, int rhs) noexcept { return lhs + rhs; };
73- * auto f3 = cuco::reduction_functor<decltype(custom_plus_lambda), int>(identity, custom_plus_lambda);
76+ * auto f3 = cuco::reduction_functor<decltype(custom_plus_lambda), int>(identity,
77+ * custom_plus_lambda);
7478 * }
7579 * \endcode
7680 *
@@ -82,42 +86,59 @@ class reduction_functor : detail::reduction_functor_base {
8286 public:
8387 using value_type = Value;
8488
85- reduction_functor (cuco::identity_value<Value> identity, Func functor = Func{}) noexcept : identity_(identity), functor_(functor) {}
89+ reduction_functor (cuco::identity_value<Value> identity, Func functor = Func{}) noexcept
90+ : identity_(identity), functor_(functor)
91+ {
92+ }
8693
8794 template <cuda::thread_scope Scope>
88- __device__ value_type operator ()(cuda::atomic<value_type, Scope>& lhs, value_type const & rhs) const noexcept
95+ __device__ value_type operator ()(cuda::atomic<value_type, Scope>& lhs,
96+ value_type const & rhs) const noexcept
8997 {
9098 if constexpr (uses_external_sync ()) {
9199 value_type old = lhs.load (cuda::memory_order_relaxed);
92100 value_type desired;
93101
94102 do {
95103 desired = functor_ (old, rhs);
96- } while (!lhs.compare_exchange_weak (old, desired, cuda::memory_order_release, cuda::memory_order_relaxed));
104+ } while (!lhs.compare_exchange_weak (
105+ old, desired, cuda::memory_order_release, cuda::memory_order_relaxed));
97106
98107 return desired;
99108 } else {
100109 return functor_ (lhs, rhs);
101110 }
102111 }
103112
104- __host__ __device__ value_type identity () const noexcept {
105- return identity_.value ();
106- }
113+ __host__ __device__ value_type identity () const noexcept { return identity_.value (); }
107114
108- __host__ __device__ static constexpr bool uses_external_sync () noexcept {
115+ __host__ __device__ static constexpr bool uses_external_sync () noexcept
116+ {
109117 return !atomic_invocable_ || naive_invocable_;
110118 }
111119
112120 private:
113121 cuco::identity_value<value_type> identity_;
114122 Func functor_;
115- static constexpr bool naive_invocable_ = std::is_invocable_r<value_type, Func, value_type, value_type>::value;
123+ static constexpr bool naive_invocable_ =
124+ std::is_invocable_r<value_type, Func, value_type, value_type>::value;
116125 static constexpr bool atomic_invocable_ =
117- std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_system>&, value_type>::value ||
118- std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_device>&, value_type>::value ||
119- std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_block>&, value_type>::value ||
120- std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_thread>&, value_type>::value;
126+ std::is_invocable_r<value_type,
127+ Func,
128+ cuda::atomic<value_type, cuda::thread_scope_system>&,
129+ value_type>::value ||
130+ std::is_invocable_r<value_type,
131+ Func,
132+ cuda::atomic<value_type, cuda::thread_scope_device>&,
133+ value_type>::value ||
134+ std::is_invocable_r<value_type,
135+ Func,
136+ cuda::atomic<value_type, cuda::thread_scope_block>&,
137+ value_type>::value ||
138+ std::is_invocable_r<value_type,
139+ Func,
140+ cuda::atomic<value_type, cuda::thread_scope_thread>&,
141+ value_type>::value;
121142
122143 static_assert (atomic_invocable_ || naive_invocable_, " Invalid operator signature." );
123144};
@@ -128,30 +149,44 @@ class reduction_functor : detail::reduction_functor_base {
128149 * @tparam T The value type used for reduction
129150 */
130151template <typename T>
131- auto reduce_add () { return reduction_functor (identity_value<T>{0 }, detail::reduce_add_impl<T>{}); };
152+ auto reduce_add ()
153+ {
154+ return reduction_functor (identity_value<T>{0 }, detail::reduce_add_impl<T>{});
155+ };
132156
133157/* *
134158 * @brief Synchronized `min` reduction functor.
135159 *
136160 * @tparam T The value type used for reduction
137161 */
138162template <typename T>
139- auto reduce_min () { return reduction_functor (identity_value{cuda::std::numeric_limits<T>::max ()}, detail::reduce_min_impl<T>{}); };
163+ auto reduce_min ()
164+ {
165+ return reduction_functor (identity_value{cuda::std::numeric_limits<T>::max ()},
166+ detail::reduce_min_impl<T>{});
167+ };
140168
141169/* *
142170 * @brief Synchronized `max` reduction functor.
143171 *
144172 * @tparam T The value type used for reduction
145173 */
146174template <typename T>
147- auto reduce_max () { return reduction_functor (identity_value{cuda::std::numeric_limits<T>::lowest ()}, detail::reduce_max_impl<T>{}); };
175+ auto reduce_max ()
176+ {
177+ return reduction_functor (identity_value{cuda::std::numeric_limits<T>::lowest ()},
178+ detail::reduce_max_impl<T>{});
179+ };
148180
149181/* *
150182 * @brief Synchronized `count` reduction functor.
151183 *
152184 * @tparam T The value type used for reduction
153185 */
154186template <typename T>
155- auto reduce_count () { return reduction_functor (identity_value<T>{0 }, detail::reduce_count_impl<T>{}); };
187+ auto reduce_count ()
188+ {
189+ return reduction_functor (identity_value<T>{0 }, detail::reduce_count_impl<T>{});
190+ };
156191
157192} // namespace cuco
0 commit comments