[pre-commit.ci] auto code formatting

pre-commit-ci[bot] · pre-commit-ci[bot] · commit ef1540178849 · 2022-07-11T13:24:04.000Z
diff --git a/include/cuco/detail/reduction_functor_impl.cuh b/include/cuco/detail/reduction_functor_impl.cuh
@@ -29,46 +29,54 @@ namespace detail {
  * @warning This class should not be used directly.
  *
  */
-class reduction_functor_base {};
+class reduction_functor_base {
+};
 
 template <typename T, typename Enable = void>
 struct reduce_add_impl {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept
+  {
     return lhs.fetch_add(rhs) + rhs;
   }
 };
 
 template <typename T, typename Enable = void>
 struct reduce_min_impl {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept
+  {
     return min(lhs.fetch_min(rhs), rhs);
   }
 };
 
 template <typename T, typename Enable = void>
 struct reduce_max_impl {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept
+  {
     return max(lhs.fetch_max(rhs), rhs);
   }
 };
 
 template <typename T, typename Enable = void>
 struct reduce_count_impl {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& /* rhs */) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& /* rhs */) const noexcept
+  {
     return ++lhs;
   }
 };
 
 // remove the following WAR once libcu++ extends FP atomics support and fixes signed integer atomics
 // https://github.com/NVIDIA/libcudacxx/pull/286
 template <typename T>
-struct reduce_add_impl<T, typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
+struct reduce_add_impl<
+  T,
+  typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T rhs) const noexcept
+  {
     if constexpr (Scope == cuda::thread_scope_system)
       return atomicAdd_system(reinterpret_cast<T*>(&lhs), rhs) + rhs;
     else if constexpr (Scope == cuda::thread_scope_device)
@@ -79,11 +87,15 @@ struct reduce_add_impl<T, typename cuda::std::enable_if<cuda::std::is_floating_p
 };
 
 template <typename T>
-struct reduce_min_impl<T, typename cuda::std::enable_if<cuda::std::is_integral<T>::value && cuda::std::is_signed<T>::value && sizeof(T) == 8>::type> {
+struct reduce_min_impl<
+  T,
+  typename cuda::std::enable_if<cuda::std::is_integral<T>::value &&
+                                cuda::std::is_signed<T>::value && sizeof(T) == 8>::type> {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept
+  {
     using InternalT = typename cuda::std::conditional<sizeof(T) == 8, long long int, int>::type;
-    InternalT * ptr = reinterpret_cast<InternalT*>(&lhs);
+    InternalT* ptr  = reinterpret_cast<InternalT*>(&lhs);
     InternalT value = rhs;
     if constexpr (Scope == cuda::thread_scope_system)
       return min(atomicMin_system(ptr, value), value);
@@ -95,11 +107,14 @@ struct reduce_min_impl<T, typename cuda::std::enable_if<cuda::std::is_integral<T
 };
 
 template <typename T>
-struct reduce_max_impl<T, typename cuda::std::enable_if<cuda::std::is_integral<T>::value && cuda::std::is_signed<T>::value>::type> {
+struct reduce_max_impl<T,
+                       typename cuda::std::enable_if<cuda::std::is_integral<T>::value &&
+                                                     cuda::std::is_signed<T>::value>::type> {
   template <cuda::thread_scope Scope>
-  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept {
+  __device__ T operator()(cuda::atomic<T, Scope>& lhs, T const& rhs) const noexcept
+  {
     using InternalT = typename cuda::std::conditional<sizeof(T) == 8, long long int, int>::type;
-    InternalT * ptr = reinterpret_cast<InternalT*>(&lhs);
+    InternalT* ptr  = reinterpret_cast<InternalT*>(&lhs);
     InternalT value = rhs;
     if constexpr (Scope == cuda::thread_scope_system)
       return max(atomicMax_system(ptr, value), value);
@@ -111,18 +126,18 @@ struct reduce_max_impl<T, typename cuda::std::enable_if<cuda::std::is_integral<T
 };
 
 template <typename T>
-struct reduce_min_impl<T, typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
-  __device__ T operator()(T lhs, T rhs) const noexcept {
-    return min(lhs, rhs);
-  }
+struct reduce_min_impl<
+  T,
+  typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
+  __device__ T operator()(T lhs, T rhs) const noexcept { return min(lhs, rhs); }
 };
 
 template <typename T>
-struct reduce_max_impl<T, typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
-  __device__ T operator()(T lhs, T rhs) const noexcept {
-    return max(lhs, rhs);
-  }
+struct reduce_max_impl<
+  T,
+  typename cuda::std::enable_if<cuda::std::is_floating_point<T>::value>::type> {
+  __device__ T operator()(T lhs, T rhs) const noexcept { return max(lhs, rhs); }
 };
 
-} // namespace detail
-} // namespace cuco
+}  // namespace detail
+}  // namespace cuco
diff --git a/include/cuco/reduction_functors.cuh b/include/cuco/reduction_functors.cuh
@@ -36,6 +36,7 @@ class identity_value {
   using type = T;
   constexpr identity_value(T const& identity) noexcept : identity_(identity) {}
   constexpr T value() const noexcept { return identity_; }
+
  private:
   T identity_;
 };
@@ -64,13 +65,16 @@ class identity_value {
  * };
  *
  * int main() {
- *   cuco::identity_value<int> identity{0}; // define the identity value for the given reduction operation, i.e., op(identity, x) == x
+ *   cuco::identity_value<int> identity{0}; // define the identity value for the given reduction
+ * operation, i.e., op(identity, x) == x
  *
- *   auto f1 = cuco::reduction_functor<custom_plus<int>, int>(identity); // synchronized via CAS-loop
- *   auto f2 = cuco::reduction_functor<custom_plus_sync<int>, int>(identity); // implicitly synchronized
+ *   auto f1 = cuco::reduction_functor<custom_plus<int>, int>(identity); // synchronized via
+ * CAS-loop auto f2 = cuco::reduction_functor<custom_plus_sync<int>, int>(identity); // implicitly
+ * synchronized
  *
  *   auto custom_plus_lambda = [] __device__ (int lhs, int rhs) noexcept { return lhs + rhs; };
- *   auto f3 = cuco::reduction_functor<decltype(custom_plus_lambda), int>(identity, custom_plus_lambda);
+ *   auto f3 = cuco::reduction_functor<decltype(custom_plus_lambda), int>(identity,
+ * custom_plus_lambda);
  * }
  * \endcode
  *
@@ -82,42 +86,59 @@ class reduction_functor : detail::reduction_functor_base {
  public:
   using value_type = Value;
 
-  reduction_functor(cuco::identity_value<Value> identity, Func functor = Func{}) noexcept : identity_(identity), functor_(functor) {}
+  reduction_functor(cuco::identity_value<Value> identity, Func functor = Func{}) noexcept
+    : identity_(identity), functor_(functor)
+  {
+  }
 
   template <cuda::thread_scope Scope>
-  __device__ value_type operator()(cuda::atomic<value_type, Scope>& lhs, value_type const& rhs) const noexcept
+  __device__ value_type operator()(cuda::atomic<value_type, Scope>& lhs,
+                                   value_type const& rhs) const noexcept
   {
     if constexpr (uses_external_sync()) {
       value_type old = lhs.load(cuda::memory_order_relaxed);
       value_type desired;
 
       do {
         desired = functor_(old, rhs);
-      } while (!lhs.compare_exchange_weak(old, desired, cuda::memory_order_release, cuda::memory_order_relaxed));
+      } while (!lhs.compare_exchange_weak(
+        old, desired, cuda::memory_order_release, cuda::memory_order_relaxed));
 
       return desired;
     } else {
       return functor_(lhs, rhs);
     }
   }
 
-  __host__ __device__ value_type identity() const noexcept {
-    return identity_.value();
-  }
+  __host__ __device__ value_type identity() const noexcept { return identity_.value(); }
 
-  __host__ __device__ static constexpr bool uses_external_sync() noexcept {
+  __host__ __device__ static constexpr bool uses_external_sync() noexcept
+  {
     return !atomic_invocable_ || naive_invocable_;
   }
 
  private:
   cuco::identity_value<value_type> identity_;
   Func functor_;
-  static constexpr bool naive_invocable_ = std::is_invocable_r<value_type, Func, value_type, value_type>::value;
+  static constexpr bool naive_invocable_ =
+    std::is_invocable_r<value_type, Func, value_type, value_type>::value;
   static constexpr bool atomic_invocable_ =
-    std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_system>&, value_type>::value ||
-    std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_device>&, value_type>::value ||
-    std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_block>&,  value_type>::value ||
-    std::is_invocable_r<value_type, Func, cuda::atomic<value_type, cuda::thread_scope_thread>&, value_type>::value;
+    std::is_invocable_r<value_type,
+                        Func,
+                        cuda::atomic<value_type, cuda::thread_scope_system>&,
+                        value_type>::value ||
+    std::is_invocable_r<value_type,
+                        Func,
+                        cuda::atomic<value_type, cuda::thread_scope_device>&,
+                        value_type>::value ||
+    std::is_invocable_r<value_type,
+                        Func,
+                        cuda::atomic<value_type, cuda::thread_scope_block>&,
+                        value_type>::value ||
+    std::is_invocable_r<value_type,
+                        Func,
+                        cuda::atomic<value_type, cuda::thread_scope_thread>&,
+                        value_type>::value;
 
   static_assert(atomic_invocable_ || naive_invocable_, "Invalid operator signature.");
 };
@@ -128,30 +149,44 @@ class reduction_functor : detail::reduction_functor_base {
  * @tparam T The value type used for reduction
  */
 template <typename T>
-auto reduce_add() { return reduction_functor(identity_value<T>{0}, detail::reduce_add_impl<T>{}); };
+auto reduce_add()
+{
+  return reduction_functor(identity_value<T>{0}, detail::reduce_add_impl<T>{});
+};
 
 /**
  * @brief Synchronized `min` reduction functor.
  *
  * @tparam T The value type used for reduction
  */
 template <typename T>
-auto reduce_min() { return reduction_functor(identity_value{cuda::std::numeric_limits<T>::max()}, detail::reduce_min_impl<T>{}); };
+auto reduce_min()
+{
+  return reduction_functor(identity_value{cuda::std::numeric_limits<T>::max()},
+                           detail::reduce_min_impl<T>{});
+};
 
 /**
  * @brief Synchronized `max` reduction functor.
  *
  * @tparam T The value type used for reduction
  */
 template <typename T>
-auto reduce_max() { return reduction_functor(identity_value{cuda::std::numeric_limits<T>::lowest()}, detail::reduce_max_impl<T>{}); };
+auto reduce_max()
+{
+  return reduction_functor(identity_value{cuda::std::numeric_limits<T>::lowest()},
+                           detail::reduce_max_impl<T>{});
+};
 
 /**
  * @brief Synchronized `count` reduction functor.
  *
  * @tparam T The value type used for reduction
  */
 template <typename T>
-auto reduce_count() { return reduction_functor(identity_value<T>{0}, detail::reduce_count_impl<T>{}); };
+auto reduce_count()
+{
+  return reduction_functor(identity_value<T>{0}, detail::reduce_count_impl<T>{});
+};
 
 }  // namespace cuco
diff --git a/tests/static_reduction_map/reduction_functors_test.cu b/tests/static_reduction_map/reduction_functors_test.cu