-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtensor.py
More file actions
112 lines (83 loc) · 2.96 KB
/
tensor.py
File metadata and controls
112 lines (83 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from typing import Tuple
import dataclasses as dc
import numpy as np
import compute_graph
import tile
import vector
@dc.dataclass
class Tensor(compute_graph.Data):
"A memory buffer stored in DRAM."
shape: vector.VectorOp
dtype: type
def set_name(self, num_allocations: int) -> None:
self.name = f"t{num_allocations}"
def referenced_ops(self) -> Tuple[compute_graph.Operator]:
return (self.shape,)
@dc.dataclass
class TensorOp(compute_graph.Operator):
pass
class Allocate(TensorOp):
def __init__(self, shape: vector.VectorOp, dtype: np.dtype):
return super().__init__(
out=Tensor("<unnamed>", shape, dtype),
input_ops=(shape,),
)
def extra_args(self) -> str:
return f"dtype=" + np.dtype(self.out.dtype).name
class Return(TensorOp):
def __init__(self, t: TensorOp):
return super().__init__(out=t.out, input_ops=(t,))
class Add(TensorOp):
def __init__(self, in_a: TensorOp, in_b: TensorOp):
new_t = Allocate(in_a.out.shape, in_a.out.dtype)
return super().__init__(out=new_t.out, input_ops=(in_a, in_b, new_t))
def generate(self) -> tile.TileOp:
return Scan(tile.Add, (self.input_ops[0].out, self.input_ops[1].out, self.out))
class SoftMax(TensorOp):
def __init__(self, t: TensorOp):
new_t = Allocate(t.out.shape, t.out.dtype)
super().__init__(out=new_t.out, input_ops=(t, new_t))
def generate(self) -> tile.TileOp:
accumulator_tile = tile.Tile(self.out.shape[:-1], dtype=np.int32)
(in_a,) = self.ins
return tile.WithTemporaryTile(
accumulator_tile,
Scan(
tile.ExpSum, tensors=(in_a.out, self.out), fixed_tiles=accumulator_tile
),
Scan(tile.DivideInPlace, tensors=(self.out,), fixed_tiles=accumulator_tile),
)
class MatMul(TensorOp):
def __init__(self, in_a: TensorOp, in_b: TensorOp):
# Create a new shape vector whose value is
# out_shape = in_a.shape[:-1] + (in_b.shape[-1],)
len = in_a.out.shape.out.length
out_shape = vector.Copy(
dst=vector.Copy(
dst=vector.Allocate(len, in_a.out.shape.out.dtype),
src=in_a.out.shape,
size=len - 1,
),
dst_offset=len - 1,
src=in_b.out.shape,
src_offset=len - 1,
size=1,
)
new_t = Allocate(out_shape, in_a.out.dtype)
super().__init__(out=new_t.out, input_ops=(in_a, in_b, new_t))
def generate(self) -> tile.TileOp:
return ScanForMatMul(
tile.MatMul, (self.input_ops[0].out, self.input_ops[1].out, self.out)
)
@dc.dataclass
class Scan(tile.TileOp):
op: tile.TileOp
tensors: Tuple[Tensor]
fixed_tiles: Tuple[tile.Tile]
_tile_shape: Tuple[int] = None
@dc.dataclass
class ScanForMatMul(tile.TileOp):
op: tile.TileOp
tile_in_a: Tensor
tile_in_b: Tensor
tile_out: Tensor