PyTorchSim/PyTorchSimDevice/csrc/aten/OpenRegExtra.cpp at 3d9cb387b2ba27853efb983241fa4450c3174d9d · PSAL-POSTECH/PyTorchSim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include "native/Extra.h"

#include <ATen/native/CPUFallback.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/native/transformers/attention.h>

#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
#include <torch/library.h>

namespace at::openreg {

namespace {
at::Tensor wrapper_quantize_per_tensor(
    const at::Tensor& self,
    double scale,
    int64_t zero_point,
    at::ScalarType dtype) {
  return at::native::openreg::quantize_per_tensor(
      self, scale, zero_point, dtype);
}

int64_t wrapper__fused_sdp_choice(
    const at::Tensor& query,
    const at::Tensor& key,
    const at::Tensor& value,
    const std::optional<at::Tensor>& attn_mask,
    double dropout_p,
    bool is_causal,
    std::optional<double> scale,
    bool enable_gqa) {
  return at::native::openreg::_fused_sdp_choice(
      query, key, value, attn_mask, dropout_p, is_causal, scale, enable_gqa);
}

void wrapper_quantize_tensor_per_tensor_affine_stub(
    const at::Tensor& rtensor,
    at::Tensor& qtensor,
    double scale,
    int64_t zero_point) {
  at::native::openreg::quantize_tensor_per_tensor_affine_stub(
      rtensor, qtensor, scale, zero_point);
}

std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
wrapper_scaled_dot_product_fused_attention_overrideable_backward(
    const at::Tensor& grad_out,
    const at::Tensor& query,
    const at::Tensor& key,
    const at::Tensor& value,
    const at::Tensor& attn_bias,
    std::array<bool, 4> grad_input_mask,
    const at::Tensor& out,
    const at::Tensor& logsumexp,
    const at::Tensor& cum_seq_q,
    const at::Tensor& cum_seq_k,
    int64_t max_q,
    int64_t max_k,
    double dropout_p,
    bool is_causal,
    const at::Tensor& philox_seed,
    const at::Tensor& philox_offset,
    std::optional<double> scale) {
  return at::native::openreg::
      _scaled_dot_product_fused_attention_overrideable_backward(
          grad_out,
          query,
          key,
          value,
          attn_bias,
          grad_input_mask,
          out,
          logsumexp,
          cum_seq_q,
          cum_seq_k,
          max_q,
          max_k,
          dropout_p,
          is_causal,
          philox_seed,
          philox_offset,
          scale);
}

at::Tensor wrapper_custom_autograd_fn_returns_self(at::Tensor x) {
  return at::native::openreg::custom_autograd_fn_returns_self(x);
}

at::Tensor wrapper_custom_autograd_fn_aliasing(at::Tensor x) {
  return at::native::openreg::custom_autograd_fn_aliasing(x);
}

at::Tensor& wrapper_abs_out(const at::Tensor& self, at::Tensor& out) {
  return at::native::openreg::abs_out(self, out);
}

void wrapper_abs_stub(at::TensorIteratorBase& iter) {
  at::native::openreg::abs_kernel(iter);
}

at::Tensor wrapper_custom_abs(at::Tensor x) {
  return at::native::openreg::custom_abs(x);
}
} // namespace

using namespace at::native;
// Registration via STUB
// LITERALINCLUDE START: STUB DEFAULT
REGISTER_PRIVATEUSE1_DISPATCH(abs_stub, &wrapper_abs_stub);
REGISTER_PRIVATEUSE1_DISPATCH(
    quantize_tensor_per_tensor_affine_stub,
    &wrapper_quantize_tensor_per_tensor_affine_stub);
REGISTER_PRIVATEUSE1_DISPATCH(
    _fused_sdp_choice_stub,
    &wrapper__fused_sdp_choice);
// LITERALINCLUDE END: STUB DEFAULT

// Registration of custom operators
// LITERALINCLUDE START: CUSTOM OPERATOR SCHEMA
TORCH_LIBRARY(openreg, m) {
  m.def("custom_abs(Tensor input)-> Tensor");
}
// LITERALINCLUDE END: CUSTOM OPERATOR SCHEMA

// LITERALINCLUDE START: CUSTOM OPERATOR DEFAULT
TORCH_LIBRARY_IMPL(openreg, PrivateUse1, m) {
  m.impl("custom_abs", &wrapper_custom_abs);
}
// LITERALINCLUDE END: CUSTOM OPERATOR DEFAULT

// LITERALINCLUDE START: CUSTOM OPERATOR FALLBACK
TORCH_LIBRARY_IMPL(_, AutogradPrivateUse1, m) {
  m.fallback(torch::autograd::autogradNotImplementedFallback());
}
// LITERALINCLUDE END: CUSTOM OPERATOR FALLBACK

// The rest is for testing purposes
TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
  /*
   abs_stub only works if abs.out is also registered with PrivateUse1, because
   abs.default is designed to redirect directly to abs.out, which calls
   abs_stub.
  */
  m.impl("abs.out", &wrapper_abs_out);
  m.impl("quantize_per_tensor", &wrapper_quantize_per_tensor);
  m.impl("_fused_sdp_choice", &wrapper__fused_sdp_choice);
  m.impl(
      "_scaled_dot_product_fused_attention_overrideable_backward",
      &wrapper_scaled_dot_product_fused_attention_overrideable_backward);
}

TORCH_LIBRARY_FRAGMENT(openreg, m) {
  m.def("custom_autograd_fn_returns_self(Tensor input)-> Tensor");
  m.def("custom_autograd_fn_aliasing(Tensor(a) input)-> Tensor(a)");
}

TORCH_LIBRARY_IMPL(openreg, AutogradPrivateUse1, m) {
  m.impl(
      "custom_autograd_fn_returns_self",
      &wrapper_custom_autograd_fn_returns_self);
  m.impl("custom_autograd_fn_aliasing", &wrapper_custom_autograd_fn_aliasing);
}

} // namespace at::openreg