Skip to content

Commit 9950185

Browse files
committed
Implement StreamBuffer modules for HBM buffering
- wip: implemented stream cache writer, working on reader - implement stream reader - add tests for stream cache - move from stream cache to buffer, change semantics - use a reset state to prevent mem_config_i swallowing in StreamWriter - clarify unit test names - remove old needless edits - tie off other card signals if any
1 parent 869c54b commit 9950185

18 files changed

Lines changed: 798 additions & 21 deletions

hardware/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ find_package(CoyoteHW REQUIRED)
99
set(N_REGIONS 1)
1010
set(EN_STRM 1)
1111
set(N_STRM_AXI 4)
12+
set(EN_MEM 1)
13+
set(N_CARD_AXI 1)
1214
set(FDEV_NAME u55c)
1315

1416
validation_checks_hw()
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
`timescale 1ns / 1ps
2+
3+
import libstf::*;
4+
5+
`include "libstf_macros.svh"
6+
7+
/*
8+
* This interface links the StreamBufferWriter and StreamBufferWriter, acting as
9+
* a stream where tokens are shared. Each tokens represents the amount of
10+
* bytes written by the latest card write. This is used to pause reads until
11+
* data for a region has been fully written, so that partial data is never
12+
* read.
13+
*/
14+
interface stream_buffer_link_i (
15+
input logic clk,
16+
input logic rst_n
17+
);
18+
vaddress_t vaddr;
19+
vaddress_t size;
20+
logic last;
21+
logic valid;
22+
logic ready;
23+
24+
task tie_off_m();
25+
valid = 1'b0;
26+
endtask
27+
28+
task tie_off_s();
29+
ready = 1'b1;
30+
endtask
31+
32+
modport m (
33+
import tie_off_m,
34+
output vaddr, size, last, valid,
35+
input ready
36+
);
37+
38+
modport s (
39+
import tie_off_s,
40+
output ready,
41+
input vaddr, size, last, valid
42+
);
43+
44+
`ifndef SYNTHESIS
45+
`STF_ASSERT_SIGNAL_STABLE(vaddr);
46+
`STF_ASSERT_SIGNAL_STABLE(size);
47+
`STF_ASSERT_SIGNAL_STABLE(last);
48+
49+
`STF_ASSERT_NOT_UNDEFINED(valid);
50+
`STF_ASSERT_NOT_UNDEFINED(ready);
51+
`endif
52+
endinterface
53+
54+
interface mem_read_config_i (
55+
input logic clk,
56+
input logic rst_n
57+
);
58+
vaddress_t vaddr;
59+
data32_t size;
60+
logic valid;
61+
logic ready;
62+
63+
task tie_off_m();
64+
valid = 1'b0;
65+
endtask
66+
67+
task tie_off_s();
68+
ready = 1'b1;
69+
endtask
70+
71+
modport m (
72+
import tie_off_m,
73+
output vaddr, size, valid,
74+
input ready
75+
);
76+
77+
modport s (
78+
import tie_off_s,
79+
output ready,
80+
input vaddr, size, valid
81+
);
82+
83+
`ifndef SYNTHESIS
84+
`STF_ASSERT_STABLE(vaddr, valid, ready);
85+
`STF_ASSERT_STABLE(size, valid, ready);
86+
`STF_ASSERT_NOT_UNDEFINED(valid);
87+
`STF_ASSERT_NOT_UNDEFINED(ready);
88+
`endif
89+
endinterface
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
`timescale 1ns / 1ps
2+
3+
import lynxTypes::*;
4+
import libstf::*;
5+
6+
`include "axi_macros.svh"
7+
`include "lynx_macros.svh"
8+
`include "libstf_macros.svh"
9+
10+
/*
11+
* NOTE: the input_data should be wired to the AXI stream where incmonig data
12+
* will be streamed after a request has been sent via sq_rd.
13+
* For example, in the case of card memory, it should be
14+
* axis_card_recv[AXI_STRM_ID].
15+
* NOTE: the TRANSFER_LENGTH_BYTES must be the same as configured in the
16+
* writer.
17+
*/
18+
module StreamBufferReader #(
19+
parameter AXI_STRM_ID = 0,
20+
parameter TRANSFER_SIZE = TRANSFER_SIZE_BYTES
21+
) (
22+
input logic clk,
23+
input logic rst_n,
24+
25+
metaIntf.m sq_rd,
26+
metaIntf.s cq_rd,
27+
28+
stream_buffer_link_i.s link,
29+
30+
AXI4SR.s in,
31+
AXI4S.m out
32+
);
33+
34+
`RESET_RESYNC // Reset pipelining
35+
36+
mem_read_config_i mem_config (.clk(clk), .rst_n(reset_synced));
37+
38+
assign mem_config.vaddr = link.vaddr;
39+
assign mem_config.size = link.size;
40+
assign mem_config.valid = link.valid;
41+
assign link.ready = mem_config.ready;
42+
43+
AXI4S inner_out (.aclk(clk), .aresetn(reset_synced));
44+
45+
StreamReader #(
46+
.STRM(STRM_CARD),
47+
.AXI_STRM_ID(AXI_STRM_ID),
48+
.TRANSFER_LENGTH_BYTES(TRANSFER_SIZE)
49+
) inst_stream_reader (
50+
.clk(clk),
51+
.rst_n(reset_synced),
52+
53+
.sq_rd(sq_rd),
54+
.cq_rd(cq_rd),
55+
56+
.mem_config(mem_config),
57+
58+
.input_data(in),
59+
.output_data(inner_out)
60+
);
61+
62+
logic last_received, n_last_received;
63+
64+
always_ff @(posedge clk) begin
65+
if (reset_synced == 1'b0) begin
66+
last_received <= 1'b0;
67+
end else begin
68+
last_received <= n_last_received;
69+
end
70+
end
71+
72+
always_comb begin
73+
n_last_received = last_received;
74+
75+
if (link.ready && link.valid) begin
76+
n_last_received = link.last;
77+
end
78+
end
79+
80+
assign out.tdata = inner_out.tdata;
81+
assign out.tkeep = inner_out.tkeep;
82+
assign out.tlast = inner_out.tlast && last_received;
83+
assign out.tvalid = inner_out.tvalid;
84+
assign inner_out.tready = out.tready;
85+
86+
endmodule
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
`timescale 1ns / 1ps
2+
3+
import libstf::*;
4+
import lynxTypes::*;
5+
6+
`include "axi_macros.svh"
7+
`include "libstf_macros.svh"
8+
9+
/*
10+
* NOTE: out must be wired to axis_card_send[AXI_STRM_ID].
11+
*/
12+
module StreamBufferWriter #(
13+
parameter AXI_STRM_ID = 0,
14+
parameter TRANSFER_SIZE = TRANSFER_SIZE_BYTES,
15+
// NOTE: this is the number of tranfers that will be allocated at a time
16+
// when more memory is provided to the underlying StreamWriter.
17+
parameter TRANFERS_PER_ALLOCATION = MAXIMUM_HOST_ALLOCATION_SIZE_BYTES / TRANSFER_SIZE
18+
) (
19+
input logic clk,
20+
input logic rst_n,
21+
22+
metaIntf.m sq_wr,
23+
metaIntf.s cq_wr,
24+
25+
AXI4S.s in,
26+
27+
stream_buffer_link_i.m link,
28+
AXI4SR.m out
29+
);
30+
31+
`RESET_RESYNC // Reset pipelining
32+
33+
localparam int ALLOCATION_BYTES = TRANFERS_PER_ALLOCATION * TRANSFER_SIZE;
34+
35+
// This stream is used on the host to allocate more data for the StreamWriter.
36+
// On card memory, there's no need for allocations, new mem_config_i regions
37+
// are provided when requested from the code below. Thus, we can just tie off
38+
// this signal.
39+
stream_writer_notify_i notify (.clk(clk), .rst_n(reset_synced));
40+
mem_config_i mem_config (.clk(clk), .rst_n(reset_synced));
41+
vaddress_t next_vaddr, next_buffer_vaddr, last_allocation_end_vaddr;
42+
43+
buffer_t next_buffer;
44+
assign next_buffer.vaddr = next_vaddr;
45+
assign next_buffer.size = TRANFERS_PER_ALLOCATION;
46+
assign mem_config.flush_buffers = 1'b0;
47+
assign mem_config.buffer_data = next_buffer;
48+
assign mem_config.buffer_valid = 1'b1;
49+
50+
// This state machine ensures that the notification of a compled write is
51+
// received by the consumer on the other end of the link. It also ensures that
52+
// when the current memory allocation is exhausted, a new memory allocation is
53+
// provided on the mem_config interface and acknowledged.
54+
typedef enum logic {
55+
ST_NOT_FULL,
56+
ST_NEEDS_ALLOCATION
57+
} state_t;
58+
state_t state;
59+
60+
vaddress_t n_next_vaddr;
61+
assign n_next_vaddr = next_vaddr + notify.size;
62+
63+
always_ff @(posedge clk) begin
64+
if (reset_synced == 1'b0) begin
65+
next_vaddr <= '0;
66+
next_buffer_vaddr <= '0;
67+
last_allocation_end_vaddr <= '0;
68+
69+
state <= ST_NEEDS_ALLOCATION;
70+
end else begin
71+
case (state)
72+
ST_NOT_FULL: begin
73+
if (notify.ready && notify.valid) begin
74+
next_vaddr <= n_next_vaddr;
75+
76+
// When we receive a last, the writer is going to assume
77+
// that we want a new allocation for the next stream. This
78+
// is the case when sending data to the host, but not when
79+
// sending data to the card (we don't want to waste memory
80+
// for a new allocation, leaving the current one half-used).
81+
if (notify.last || n_next_vaddr >= last_allocation_end_vaddr) begin
82+
state <= ST_NEEDS_ALLOCATION;
83+
end
84+
end
85+
end
86+
87+
ST_NEEDS_ALLOCATION: begin
88+
if (mem_config.buffer_ready) begin
89+
next_buffer_vaddr <= next_buffer_vaddr + ALLOCATION_BYTES;
90+
last_allocation_end_vaddr <= next_vaddr + ALLOCATION_BYTES;
91+
92+
state <= ST_NOT_FULL;
93+
end
94+
end
95+
endcase
96+
end
97+
end
98+
99+
always_comb begin
100+
if (reset_synced == 1'b0) begin
101+
notify.ready = 1'b0;
102+
mem_config.buffer_valid = 1'b0;
103+
end else begin
104+
case (state)
105+
ST_NOT_FULL: begin
106+
notify.ready = link.ready;
107+
mem_config.buffer_valid = 1'b0;
108+
end
109+
110+
ST_NEEDS_ALLOCATION: begin
111+
notify.ready = 1'b0;
112+
mem_config.buffer_valid = 1'b1;
113+
end
114+
endcase
115+
end
116+
117+
link.vaddr = next_vaddr;
118+
link.size = notify.size;
119+
link.last = notify.last;
120+
link.valid = state == ST_NOT_FULL && notify.valid;
121+
end
122+
123+
StreamWriter #(
124+
.STRM(STRM_CARD),
125+
.AXI_STRM_ID(AXI_STRM_ID),
126+
.TRANSFER_LENGTH_BYTES(TRANSFER_SIZE)
127+
) inst_stream_writer (
128+
.clk(clk),
129+
.rst_n(reset_synced),
130+
131+
.sq_wr(sq_wr),
132+
.cq_wr(cq_wr),
133+
134+
.notify(notify),
135+
.mem_config(mem_config),
136+
137+
.input_data(in),
138+
.output_data(out)
139+
);
140+
141+
endmodule

0 commit comments

Comments
 (0)