-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsrcAttentionPlugin.h
More file actions
133 lines (108 loc) · 4.45 KB
/
srcAttentionPlugin.h
File metadata and controls
133 lines (108 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SRC_ATTENTION_PLUGIN_H
#define TRT_SRC_ATTENTION_PLUGIN_H
#include <vector>
#include <cudnn.h>
#include "plugin.h"
#include <cublas_v2.h>
#include <cuda_fp16.h>
namespace nvinfer1
{
namespace plugin
{
class SrcAttention : public IPluginV2DynamicExt
{
public:
SrcAttention(const Weights kweight, const Weights kbias,
const Weights vweight,const Weights vbias,
const int nhead,const int nfeat,DataType type);
SrcAttention(const SrcAttention&);
SrcAttention(const void* buffer, size_t length);
~SrcAttention() override = default;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt* clone() const override;
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs,
int nbInputs, nvinfer1::IExprBuilder& exprBuilder) override;
bool supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc* inOut,
int nbInputs, int nbOutputs) override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const override;
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs,
void* workspace, cudaStream_t stream) override;
// IPluginV2Ext Methods
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
// IPluginV2 Methods
const char* getPluginVersion() const override;
const char* getPluginType() const override;
int getNbOutputs() const override;
int initialize() override;
void terminate() override;
void destroy() override;
void serialize(void* buffer) const override;
size_t getSerializationSize() const override;
const char* getPluginNamespace() const override;
void setPluginNamespace(const char* pluginNamespace) override;
private:
size_t type2size(DataType type)const {
if (type == DataType::kFLOAT || type == DataType::kINT32)
return 4;
else if (type == DataType::kHALF) {
return 2;
}
else return 1;
}
std::string mNameSpace{ "" };
int maxLen{ 0 }, maxSeql{ 0 };
int n_Head, n_Feat;
DataType ctype;
void *K, *V;
void *Score, *Score_;
void *k_Weight, *k_Bias;
void *v_Weight, *v_Bias;
//cubals cudnn
void **A[2], **B[2], **C[2], **gA, **gB, **gC;
float alpha[1] = { 1.0 }, belta[1] = { 0.0 };
half halpha[1] = { 1.0 }, hbelta[1] = { 0.0 };
cudnnHandle_t dnnHandle;
cublasHandle_t blasHandle;
cudnnTensorDescriptor_t kvdes, bdes, xdes;
cudnnSoftmaxAlgorithm_t algo_t{ CUDNN_SOFTMAX_ACCURATE };
cudnnSoftmaxMode_t mode_t{ CUDNN_SOFTMAX_MODE_CHANNEL };
};
class SrcAttentionPluginCreator : public BaseCreator
{
public:
SrcAttentionPluginCreator();
~SrcAttentionPluginCreator() override;
const char* getPluginName() const override;
const char* getPluginVersion() const override;
const PluginFieldCollection* getFieldNames() override;
IPluginV2DynamicExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
IPluginV2DynamicExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
private:
static PluginFieldCollection mFC;
static std::vector<PluginField> mPluginAttributes;
};
} // namespace plugin
} // namespace nvinfer1
#endif // TRT_PRIOR_BOX_PLUGIN_H