diff --git a/conv_1d_bc.cpp b/conv_1d_bc.cpp deleted file mode 100644 index 8efbcd282..000000000 --- a/conv_1d_bc.cpp +++ /dev/null @@ -1,264 +0,0 @@ -#ifndef __VIVADO_SYNTH__ -#include -using namespace std; - - // Debug utility - ofstream* global_debug_handle; - -#endif //__VIVADO_SYNTH__ -#include "accumulate_3.h" - -#include "hw_classes.h" - -struct M_get_input_0_to_M_compute_output_3_cache { - // RAM Box: {[0, 9]} - // Capacity: 3 - // # of read delays: 3 - fifo , 3> f; - inline hw_uint<32> peek(const int offset) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.peek(2 - offset); - } - - - - inline void push(const hw_uint<32> value) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.push(value); - } - -}; - -struct M_get_input_0_merged_banks_2_cache { - // RAM Box: {[0, 9]} - // Capacity: 2 - // # of read delays: 2 - hw_uint<32> f0; - hw_uint<32> f2; - - - inline hw_uint<32> peek_0() { - return f0; - } - - inline hw_uint<32> peek_1() { - return f2; - } - - - - inline void push(const hw_uint<32> value) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // cap: 1 reading from capacity: 1 - f2 = f0; - // cap: 1 - f0 = value; - } - -}; - -struct M_cache { - M_get_input_0_to_M_compute_output_3_cache M_get_input_0_to_M_compute_output_3; - M_get_input_0_merged_banks_2_cache M_get_input_0_merged_banks_2; -}; - - - -inline void M_get_input_0_write(hw_uint<32> & M_get_input_0, M_cache& M, int root, int p) { - M.M_get_input_0_to_M_compute_output_3.push(M_get_input_0); - M.M_get_input_0_merged_banks_2.push(M_get_input_0); -} - -inline hw_uint<32> M_compute_output_3_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_3 read pattern: { compute_output[root = 0, c] -> M[c] : 0 <= c <= 8; compute_output[root = 0, c = 9] -> M[9] } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { compute_output[root, c] -> 2 : root = 0 and 0 <= c <= 7; compute_output[root, c] -> 1 : root = 0 and c = 8 } - auto value_M_get_input_0 = M.M_get_input_0_to_M_compute_output_3.peek(/* one reader or all rams */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -inline hw_uint<32> M_compute_output_4_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_4 read pattern: { compute_output[root = 0, c] -> M[9] : 8 <= c <= 9; compute_output[root = 0, c] -> M[1 + c] : 0 <= c <= 7 } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { compute_output[root, c] -> 1 : root = 0 and 0 <= c <= 7 } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_2.peek(/* Needs general delay string */ (7 - c >= 0) ? (1) : 0); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -inline hw_uint<32> M_compute_output_5_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_5 read pattern: { compute_output[root = 0, c] -> M[9] : 7 <= c <= 9; compute_output[root = 0, c] -> M[2 + c] : 0 <= c <= 6 } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_2.peek_0(); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -// # of bundles = 3 -// M_get_input_0 -// M_get_input_0 -inline void M_M_get_input_0_bundle_write(hw_uint<32>& M_get_input_0, M_cache& M, int root, int p) { - hw_uint<32> M_get_input_0_res = M_get_input_0.extract<0, 31>(); - M_get_input_0_write(M_get_input_0_res, M, root, p); -} - -// compute_output_read -// M_compute_output_3 -// M_compute_output_4 -// M_compute_output_5 -inline hw_uint<96> M_compute_output_read_bundle_read(M_cache& M, int root, int c) { - // # of ports in bundle: 3 - // M_compute_output_3 - // M_compute_output_4 - // M_compute_output_5 - - hw_uint<96> result; - hw_uint<32> M_compute_output_3_res = M_compute_output_3_select(M, root, c); - set_at<0, 96>(result, M_compute_output_3_res); - hw_uint<32> M_compute_output_4_res = M_compute_output_4_select(M, root, c); - set_at<32, 96>(result, M_compute_output_4_res); - hw_uint<32> M_compute_output_5_res = M_compute_output_5_select(M, root, c); - set_at<64, 96>(result, M_compute_output_5_res); - return result; -} - -// get_input_write -// M_get_input_0 -inline void M_get_input_write_bundle_write(hw_uint<32>& get_input_write, M_cache& M, int root, int p) { - hw_uint<32> M_get_input_0_res = get_input_write.extract<0, 31>(); - M_get_input_0_write(M_get_input_0_res, M, root, p); -} - - - -// Operation logic -inline void get_input(HWStream >& /* buffer_args num ports = 1 */in, M_cache& M, int root, int p) { - // Consume: in - auto in_p_value = in.read(); - // Produce: M - M_get_input_write_bundle_write(in_p_value, M, root, p); - -#ifndef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - -} - -inline void compute_output(HWStream >& /* buffer_args num ports = 1 */out, int root, int c) { - auto compute_result = accumulate_3(); - // Produce: out - out.write(compute_result); - -#ifndef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - -} - -// Driver function -void conv_1d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out, int num_epochs) { - -#ifndef __VIVADO_SYNTH__ - ofstream debug_file("conv_1d_bc_debug.csv"); - global_debug_handle = &debug_file; -#endif //__VIVADO_SYNTH__ - M_cache M; -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ -#ifdef __VIVADO_SYNTH__ -#pragma HLS inline recursive -#endif // __VIVADO_SYNTH__ - - for (int epoch = 0; epoch < num_epochs; epoch++) { - for (int c0 = 0; c0 <= 11; c0 += 1) { - if (c0 <= 9) - get_input(in, M, 0, c0); - if (c0 >= 2) - compute_output(M, out, 0, c0 - 2); - } - - } - -#ifndef __VIVADO_SYNTH__ - debug_file.close(); -#endif //__VIVADO_SYNTH__ -} - -void conv_1d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out) { - - conv_1d_bc(in, out, 1); -} -#ifdef __VIVADO_SYNTH__ -const int get_input_read_num_transfers = 0; -const int compute_output_write_num_transfers = 0; - - -extern "C" { - -static void read_get_input_read(hw_uint<32>* input, HWStream >& v, const int size) { - hw_uint<32> burst_reg; - int num_transfers = get_input_read_num_transfers*size; - for (int i = 0; i < num_transfers; i++) { - #pragma HLS pipeline II=1 - burst_reg = input[i]; - v.write(burst_reg); - } -} - -static void write_compute_output_write(hw_uint<32>* output, HWStream >& v, const int size) { - hw_uint<32> burst_reg; - int num_transfers = compute_output_write_num_transfers*size; - for (int i = 0; i < num_transfers; i++) { - #pragma HLS pipeline II=1 - burst_reg = v.read(); - output[i] = burst_reg; - } -} - -void conv_1d_bc_accel(hw_uint<32>* get_input_read, hw_uint<32>* compute_output_write, const int size) { -#pragma HLS dataflow -#pragma HLS INTERFACE m_axi port = get_input_read offset = slave depth = 65536 bundle = gmem0 -#pragma HLS INTERFACE m_axi port = compute_output_write offset = slave depth = 65536 bundle = gmem1 - -#pragma HLS INTERFACE s_axilite port = get_input_read bundle = control -#pragma HLS INTERFACE s_axilite port = compute_output_write bundle = control -#pragma HLS INTERFACE s_axilite port = size bundle = control -#pragma HLS INTERFACE s_axilite port = return bundle = control - - static HWStream > get_input_read_channel; - static HWStream > compute_output_write_channel; - - read_get_input_read(get_input_read, get_input_read_channel, size); - - conv_1d_bc(get_input_read_channel, compute_output_write_channel, size); - - write_compute_output_write(compute_output_write, compute_output_write_channel, size); -} - -} -#endif //__VIVADO_SYNTH__ - diff --git a/conv_2d_bc.cpp b/conv_2d_bc.cpp new file mode 100644 index 000000000..1ccc864c8 --- /dev/null +++ b/conv_2d_bc.cpp @@ -0,0 +1,790 @@ +#ifndef __VIVADO_SYNTH__ +#include +using namespace std; + + // Debug utility + ofstream* global_debug_handle; + +#endif //__VIVADO_SYNTH__ +#include "conv_3x3.h" + +#include "hw_classes.h" + +struct I_write_0_merged_banks_19_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 66 + // # of read delays: 4 + hw_uint<32> f0; + hw_uint<32> f2; + fifo , 62> f3; + hw_uint<32> f4; + hw_uint<32> f6; + + + inline hw_uint<32> peek_0() { + return f0; + } + + inline hw_uint<32> peek_1() { + return f2; + } + + inline hw_uint<32> peek_63() { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f3.back(); + } + + inline hw_uint<32> peek_64() { + return f4; + } + + inline hw_uint<32> peek_65() { + return f6; + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 1 + f6 = f4; +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 62 + f4 = f3.back(); +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 62 reading from capacity: 1 + f3.push(f2); +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 1 + f2 = f0; + // cap: 1 + f0 = value; + } + +}; + +struct I_write_0_to_I_read_0_10_1_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_11_5_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_8_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 131 + // # of read delays: 67 + fifo , 131> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(130 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_9_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_10_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_4_12_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_4_13_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 67 + // # of read delays: 3 + fifo , 67> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(66 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_5_16_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_5_17_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 3 + // # of read delays: 3 + fifo , 3> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(2 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_20_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_21_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 130 + // # of read delays: 66 + fifo , 130> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(129 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_23_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_7_25_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_8_29_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_32_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_33_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 65 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_35_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_cache { + I_write_0_merged_banks_19_cache I_write_0_merged_banks_19; + I_write_0_to_I_read_0_10_1_cache I_write_0_to_I_read_0_10_1; + I_write_0_to_I_read_0_11_5_cache I_write_0_to_I_read_0_11_5; + I_write_0_to_I_read_0_3_8_cache I_write_0_to_I_read_0_3_8; + I_write_0_to_I_read_0_3_9_cache I_write_0_to_I_read_0_3_9; + I_write_0_to_I_read_0_3_10_cache I_write_0_to_I_read_0_3_10; + I_write_0_to_I_read_0_4_12_cache I_write_0_to_I_read_0_4_12; + I_write_0_to_I_read_0_4_13_cache I_write_0_to_I_read_0_4_13; + I_write_0_to_I_read_0_5_16_cache I_write_0_to_I_read_0_5_16; + I_write_0_to_I_read_0_5_17_cache I_write_0_to_I_read_0_5_17; + I_write_0_to_I_read_0_6_20_cache I_write_0_to_I_read_0_6_20; + I_write_0_to_I_read_0_6_21_cache I_write_0_to_I_read_0_6_21; + I_write_0_to_I_read_0_6_23_cache I_write_0_to_I_read_0_6_23; + I_write_0_to_I_read_0_7_25_cache I_write_0_to_I_read_0_7_25; + I_write_0_to_I_read_0_8_29_cache I_write_0_to_I_read_0_8_29; + I_write_0_to_I_read_0_9_32_cache I_write_0_to_I_read_0_9_32; + I_write_0_to_I_read_0_9_33_cache I_write_0_to_I_read_0_9_33; + I_write_0_to_I_read_0_9_35_cache I_write_0_to_I_read_0_9_35; +}; + + + +inline void I_write_0_write(hw_uint<32> & I_write_0, I_cache& I, int root, int pr, int pc) { + I.I_write_0_merged_banks_19.push(I_write_0); + I.I_write_0_to_I_read_0_10_1.push(I_write_0); + I.I_write_0_to_I_read_0_11_5.push(I_write_0); + I.I_write_0_to_I_read_0_3_8.push(I_write_0); + I.I_write_0_to_I_read_0_3_9.push(I_write_0); + I.I_write_0_to_I_read_0_3_10.push(I_write_0); + I.I_write_0_to_I_read_0_4_12.push(I_write_0); + I.I_write_0_to_I_read_0_4_13.push(I_write_0); + I.I_write_0_to_I_read_0_5_16.push(I_write_0); + I.I_write_0_to_I_read_0_5_17.push(I_write_0); + I.I_write_0_to_I_read_0_6_20.push(I_write_0); + I.I_write_0_to_I_read_0_6_21.push(I_write_0); + I.I_write_0_to_I_read_0_6_23.push(I_write_0); + I.I_write_0_to_I_read_0_7_25.push(I_write_0); + I.I_write_0_to_I_read_0_8_29.push(I_write_0); + I.I_write_0_to_I_read_0_9_32.push(I_write_0); + I.I_write_0_to_I_read_0_9_33.push(I_write_0); + I.I_write_0_to_I_read_0_9_35.push(I_write_0); +} + +inline hw_uint<32> I_read_0_10_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_10 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 62 <= lr <= 63 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 60; read_0[root = 0, lr, lc] -> I[63, 1 + lr] : 0 <= lr <= 61 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 60; read_0[root, lr, lc] -> 64 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lc = 61 and 0 <= lr <= 61; read_0[root, lr, lc] -> (2 + lc) : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ ((-63 + lc == 0 && 61 - lr >= 0) || (61 - lc >= 0 && 61 - lr >= 0)) ? (64) : (-62 + lc == 0 && 61 - lr >= 0) ? (64) : (-62 + lr >= 0 && 60 - lc >= 0) ? ((61 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_11_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_11 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 61 <= lr <= 63 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 60; read_0[root = 0, lr, lc] -> I[63, 2 + lr] : 0 <= lr <= 60 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (-62 + lr >= 0 && 60 - lc >= 0) ? ((61 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_3_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_3 read pattern: { read_0[root = 0, lr, lc] -> I[lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 62; read_0[root = 0, lr = 63, lc] -> I[lc, 63] : 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, lr] : 0 <= lr <= 62; read_0[root = 0, lr = 63, lc = 63] -> I[63, 63] } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and lr = 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 128 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and lc = 63; read_0[root, lr, lc] -> 130 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 129 : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> (127 - lc) : root = 0 and lr = 62 and 0 <= lc <= 62 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_3_8.peek(/* one reader or all rams */ (-63 + lc == 0 && -62 + lr == 0) ? (64) : (-63 + lc == 0 && 61 - lr >= 0) ? (128) : (-62 + lc == 0 && 61 - lr >= 0) ? (129) : (61 - lc >= 0 && 61 - lr >= 0) ? (130) : (-63 + lr == 0 && 62 - lc >= 0) ? ((63 - lc)) : (-62 + lr == 0 && 62 - lc >= 0) ? ((127 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_4_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_4 read pattern: { read_0[root = 0, lr, lc] -> I[lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root = 0, lr, lc] -> I[lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, 63] : 62 <= lr <= 63; read_0[root = 0, lr, lc = 63] -> I[63, 1 + lr] : 0 <= lr <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 64 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 66 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 65 : root = 0 and lc = 62 and 0 <= lr <= 61 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_4_12.peek(/* one reader or all rams */ (-63 + lc == 0 && 61 - lr >= 0) ? (64) : (-62 + lc == 0 && 61 - lr >= 0) ? (65) : (61 - lc >= 0 && 61 - lr >= 0) ? (66) : (-62 + lr >= 0 && 62 - lc >= 0) ? ((63 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_5_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_5 read pattern: { read_0[root = 0, lr, lc] -> I[lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 62; read_0[root = 0, lr, lc] -> I[lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, 63] : 61 <= lr <= 63; read_0[root = 0, lr, lc = 63] -> I[63, 2 + lr] : 0 <= lr <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 2 : root = 0 and lr = 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lr = 61 and lc = 62; read_0[root, lr, lc] -> 2 : root = 0 and 0 <= lr <= 60 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lc = 62 and 0 <= lr <= 60 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_5_16.peek(/* one reader or all rams */ (-62 + lc == 0 && 61 - lr >= 0) ? (1) : (61 - lc >= 0 && 61 - lr >= 0) ? (2) : (-62 + lr >= 0 && 62 - lc >= 0) ? ((63 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_6_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_6 read pattern: { read_0[root = 0, lr, lc] -> I[63, lr] : 0 <= lr <= 62 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 61; read_0[root = 0, lr = 63, lc] -> I[63, 63] : 62 <= lc <= 63; read_0[root = 0, lr = 63, lc] -> I[1 + lc, 63] : 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and lr = 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 128 : root = 0 and 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and 62 <= lc <= 63; read_0[root, lr, lc] -> 129 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> (126 - lc) : root = 0 and lr = 62 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_6_20.peek(/* one reader or all rams */ (-62 + lr == 0 && -62 + lc >= 0) ? (64) : (-62 + lc >= 0 && 61 - lr >= 0) ? (128) : (61 - lc >= 0 && 61 - lr >= 0) ? (129) : (-63 + lr == 0 && 61 - lc >= 0) ? ((62 - lc)) : (-62 + lr == 0 && 61 - lc >= 0) ? ((126 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_7_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_7 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 62 <= lr <= 63 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root = 0, lr, lc] -> I[63, 1 + lr] : 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 64 : root = 0 and 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root, lr, lc] -> 65 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (-62 + lc >= 0 && 61 - lr >= 0) ? (64) : (61 - lc >= 0 && 61 - lr >= 0) ? (65) : (-62 + lr >= 0 && 61 - lc >= 0) ? ((62 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_8_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_8 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 61 <= lr <= 63 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 61; read_0[root = 0, lr, lc] -> I[63, 2 + lr] : 0 <= lr <= 60 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lr = 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and 0 <= lr <= 60 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (61 - lc >= 0 && 61 - lr >= 0) ? (1) : (-62 + lr >= 0 && 61 - lc >= 0) ? ((62 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_9_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_9 read pattern: { read_0[root = 0, lr, lc] -> I[63, lr] : 0 <= lr <= 62 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 60; read_0[root = 0, lr = 63, lc] -> I[63, 63] : 61 <= lc <= 63; read_0[root = 0, lr = 63, lc] -> I[2 + lc, 63] : 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and lr = 63 and 0 <= lc <= 60; read_0[root, lr, lc] -> 128 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 128 : root = 0 and lc = 61 and 0 <= lr <= 61; read_0[root, lr, lc] -> (66 + lc) : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and ((61 <= lc <= 62) or lc = 63); read_0[root, lr, lc] -> 128 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 60; read_0[root, lr, lc] -> (125 - lc) : root = 0 and lr = 62 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_9_32.peek(/* one reader or all rams */ (-62 + lr == 0 && -61 + lc >= 0) ? (64) : ((-63 + lc == 0 && 61 - lr >= 0) || (61 - lc >= 0 && 61 - lr >= 0)) ? (128) : (-63 + lr == 0 && 60 - lc >= 0) ? ((61 - lc)) : (-62 + lc == 0 && 61 - lr >= 0) ? (128) : (-62 + lr == 0 && 60 - lc >= 0) ? ((125 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +// # of bundles = 3 +// I_write_0 +// I_write_0 +inline void I_I_write_0_bundle_write(hw_uint<32>& I_write_0, I_cache& I, int root, int pr, int pc) { + hw_uint<32> I_write_0_res = I_write_0.extract<0, 31>(); + I_write_0_write(I_write_0_res, I, root, pr, pc); +} + +// read_0_read +// I_read_0_3 +// I_read_0_4 +// I_read_0_5 +// I_read_0_6 +// I_read_0_7 +// I_read_0_8 +// I_read_0_9 +// I_read_0_10 +// I_read_0_11 +inline hw_uint<288> I_read_0_read_bundle_read(I_cache& I, int root, int lr, int lc) { + // # of ports in bundle: 9 + // I_read_0_3 + // I_read_0_4 + // I_read_0_5 + // I_read_0_6 + // I_read_0_7 + // I_read_0_8 + // I_read_0_9 + // I_read_0_10 + // I_read_0_11 + + hw_uint<288> result; + hw_uint<32> I_read_0_3_res = I_read_0_3_select(I, root, lr, lc); + set_at<0, 288>(result, I_read_0_3_res); + hw_uint<32> I_read_0_4_res = I_read_0_4_select(I, root, lr, lc); + set_at<32, 288>(result, I_read_0_4_res); + hw_uint<32> I_read_0_5_res = I_read_0_5_select(I, root, lr, lc); + set_at<64, 288>(result, I_read_0_5_res); + hw_uint<32> I_read_0_6_res = I_read_0_6_select(I, root, lr, lc); + set_at<96, 288>(result, I_read_0_6_res); + hw_uint<32> I_read_0_7_res = I_read_0_7_select(I, root, lr, lc); + set_at<128, 288>(result, I_read_0_7_res); + hw_uint<32> I_read_0_8_res = I_read_0_8_select(I, root, lr, lc); + set_at<160, 288>(result, I_read_0_8_res); + hw_uint<32> I_read_0_9_res = I_read_0_9_select(I, root, lr, lc); + set_at<192, 288>(result, I_read_0_9_res); + hw_uint<32> I_read_0_10_res = I_read_0_10_select(I, root, lr, lc); + set_at<224, 288>(result, I_read_0_10_res); + hw_uint<32> I_read_0_11_res = I_read_0_11_select(I, root, lr, lc); + set_at<256, 288>(result, I_read_0_11_res); + return result; +} + +// write_write +// I_write_0 +inline void I_write_write_bundle_write(hw_uint<32>& write_write, I_cache& I, int root, int pr, int pc) { + hw_uint<32> I_write_0_res = write_write.extract<0, 31>(); + I_write_0_write(I_write_0_res, I, root, pr, pc); +} + + + +// Operation logic +inline void write(HWStream >& /* buffer_args num ports = 1 */in, I_cache& I, int root, int pr, int pc) { + // Consume: in + auto in_pc_c__pr_value = in.read(); + // Produce: I + I_write_write_bundle_write(in_pc_c__pr_value, I, root, pr, pc); + +#ifndef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + +} + +inline void read_0(HWStream >& /* buffer_args num ports = 1 */out, int root, int lr, int lc) { + auto compute_result = conv_3_3(); + // Produce: out + out.write(compute_result); + +#ifndef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + +} + +// Driver function +void conv_2d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out, int num_epochs) { + +#ifndef __VIVADO_SYNTH__ + ofstream debug_file("conv_2d_bc_debug.csv"); + global_debug_handle = &debug_file; +#endif //__VIVADO_SYNTH__ + I_cache I; +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ +#ifdef __VIVADO_SYNTH__ +#pragma HLS inline recursive +#endif // __VIVADO_SYNTH__ + + for (int epoch = 0; epoch < num_epochs; epoch++) { + for (int c0 = 0; c0 <= 65; c0 += 1) { + if (c0 >= 2) { + if (c0 <= 63) + for (int c1 = 0; c1 <= 1; c1 += 1) + write(in, I, 0, c0, c1); + for (int c1 = 2; c1 <= 65; c1 += 1) { + if (c0 <= 63 && c1 <= 63) + write(in, I, 0, c0, c1); + read_0(I, out, 0, c0 - 2, c1 - 2); + } + } else { + for (int c1 = 0; c1 <= 63; c1 += 1) + write(in, I, 0, c0, c1); + } + } + + } + +#ifndef __VIVADO_SYNTH__ + debug_file.close(); +#endif //__VIVADO_SYNTH__ +} + +void conv_2d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out) { + + conv_2d_bc(in, out, 1); +} +#ifdef __VIVADO_SYNTH__ +const int write_read_num_transfers = 0; +const int read_0_write_num_transfers = 0; + + +extern "C" { + +static void read_write_read(hw_uint<32>* input, HWStream >& v, const int size) { + hw_uint<32> burst_reg; + int num_transfers = write_read_num_transfers*size; + for (int i = 0; i < num_transfers; i++) { + #pragma HLS pipeline II=1 + burst_reg = input[i]; + v.write(burst_reg); + } +} + +static void write_read_0_write(hw_uint<32>* output, HWStream >& v, const int size) { + hw_uint<32> burst_reg; + int num_transfers = read_0_write_num_transfers*size; + for (int i = 0; i < num_transfers; i++) { + #pragma HLS pipeline II=1 + burst_reg = v.read(); + output[i] = burst_reg; + } +} + +void conv_2d_bc_accel(hw_uint<32>* write_read, hw_uint<32>* read_0_write, const int size) { +#pragma HLS dataflow +#pragma HLS INTERFACE m_axi port = write_read offset = slave depth = 65536 bundle = gmem0 +#pragma HLS INTERFACE m_axi port = read_0_write offset = slave depth = 65536 bundle = gmem1 + +#pragma HLS INTERFACE s_axilite port = write_read bundle = control +#pragma HLS INTERFACE s_axilite port = read_0_write bundle = control +#pragma HLS INTERFACE s_axilite port = size bundle = control +#pragma HLS INTERFACE s_axilite port = return bundle = control + + static HWStream > write_read_channel; + static HWStream > read_0_write_channel; + + read_write_read(write_read, write_read_channel, size); + + conv_2d_bc(write_read_channel, read_0_write_channel, size); + + write_read_0_write(read_0_write, read_0_write_channel, size); +} + +} +#endif //__VIVADO_SYNTH__ + diff --git a/ubuffer.cpp b/ubuffer.cpp index a9aa49601..53eb8f3cc 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -227,12 +227,15 @@ void generate_bank(CodegenOptions& options, out << "\t// # of read delays: " << read_delays.size() << endl; read_delays = sort_unique(read_delays); - + // cout << "PEEK num readers " << num_readers << endl; + // cout << "PEEK options.all_rams " << options.all_rams << endl; if (num_readers == 1 || options.all_rams) { int partition_capacity = 1 + maxdelay; out << "\tfifo<" << pt_type_string << ", " << partition_capacity << "> f" << ";" << endl; + // cout << "peek1" << endl; out << "\tinline " + pt_type_string + " peek(const int offset) {" << endl; ignore_inter_deps(out, "f"); + // cout << "peek2" << endl; out << tab(2) << "return f.peek(" << partition_capacity - 1 << " - offset);" << endl; out << tab(1) << "}" << endl << endl; @@ -286,7 +289,7 @@ void generate_bank(CodegenOptions& options, //} //assert(capacities.size() == partitions.size()); - + // cout<<"num partitions "<= 0); + // cout << "peek3" << endl; out << "\tinline " << pt_type_string << " peek_" << to_string(dv) << "() {" << endl; if (capacity > 1) { ignore_inter_deps(out, p.first); @@ -519,11 +523,15 @@ void generate_code_prefix(CodegenOptions& options, UBuffer& buf) { //banking and merge pass + // cout << "before generate bank and merge " << endl; buf.generate_bank_and_merge(options); //string inpt = buf.get_in_port(); out << "#include \"hw_classes.h\"" << endl << endl; + cout << "before get banks " << endl; for (auto b : buf.get_banks()) { + // cout << "BANK NAME " << b.name << endl; + // cout<< "BANK MERGED READERS " << b.num_readers << endl; generate_bank(options, out, b); } @@ -547,9 +555,11 @@ void generate_code_prefix(CodegenOptions& options, concat(args, dimension_var_decls(inpt, buf)); string var_args = comma_list(dimension_var_args(inpt, buf)); + // write func for every input port that gets called in this bundle out << "inline void " << inpt << "_write("; out << comma_list(args) << ") {" << endl; + // copy and broadcast whenever write to port is done //Different ram type, different address for (auto sb : buf.receiver_banks(inpt)) { //if (sb.tp == BANK_TYPE_STACK) { @@ -583,6 +593,8 @@ void generate_code_prefix(CodegenOptions& options, pieces_dom = unn(pieces_dom, to_uset(p.first)); } + // cout<<"DOMAIN "<= 0); + // cout << "peek5" << endl; value_str = bank + ".peek_" + dx + "()"; } else { + // cout << "peek6" << endl; value_str = bank + ".peek" + "( /* is opt const */ " + delay_expr + ")"; } } else if (pieces.size() == 0 && !options.all_rams) { + // cout << "peek7" << endl; value_str = bank + ".peek_0()"; } else if (pieces.size() == 1 && isl_set_is_subset(cpy(out_domain), cpy(pieces[0].first))) { string dx = codegen_c(pieces[0].second); if (!options.all_rams && is_number(dx)) { assert(safe_stoi(dx) >= 0); + // cout << "peek8" << endl; value_str = bank + ".peek_" + dx + "()"; } else { + // cout << "peek9" << endl; value_str = bank + ".peek" + "(/* is one piece but not a number */" + dx + ")"; } } else { + // cout << "peek10" << endl; value_str = bank + ".peek" + "(/* Needs general delay string */ " + delay_expr + ")"; } } - + // cout<<"value_str "< mergeable) { + cout << "merge bank called " << endl; if (!options.conditional_merge){ stack_bank merged; merged.tp = BANK_TYPE_STACK; @@ -1191,6 +1236,7 @@ void generate_code_prefix(CodegenOptions& options, merged.pt_type_string = mergeable.at(0).pt_type_string; merged.num_readers = mergeable.size(); + // cout << "MERGED NUM READERS " << merged.num_readers << endl; merged.maxdelay = -1; for (auto m : mergeable) { //cout << "merge: " << m.name << endl; @@ -1204,8 +1250,9 @@ void generate_code_prefix(CodegenOptions& options, } } merged.read_delays = sort_unique(merged.read_delays); - +cout << "mergeable size " << mergeable.size() << endl; for (auto to_replace : mergeable) { + cout << "replace bank called" << endl; replace_bank(to_replace, merged); } } @@ -1215,9 +1262,6 @@ void generate_code_prefix(CodegenOptions& options, sort(mergeable.begin(), mergeable.end(), [](const bank& l, const bank& r) { return l.maxdelay > r.maxdelay; }); - for (auto merge_bank : mergeable) { - //cout << merge_bank.name << " with delay : " << merge_bank.maxdelay << endl; - } while(mergeable.size()) { //keep pop port to merged bank and replace origin bank @@ -1243,7 +1287,7 @@ void generate_code_prefix(CodegenOptions& options, merged.rddom = unn(merged.rddom, m.rddom); merged.maxdelay = m.maxdelay; merged.read_delays.push_back(m.maxdelay); - cout << m.maxdelay <<", " << merged.maxdelay << endl; + //cout << m.maxdelay <<", " << merged.maxdelay << endl; //get the next data mergeable.pop_back(); @@ -1257,6 +1301,7 @@ void generate_code_prefix(CodegenOptions& options, for (auto to_replace : replace_candidates) { cout << to_replace.name << endl; + cout << "replace bank called 2 " << endl; replace_bank(to_replace, merged); } cout << "Create a new bank !"<< endl; @@ -1287,7 +1332,7 @@ void generate_code_prefix(CodegenOptions& options, for (auto outpt : get_out_ports()) { auto overlap = its(range(access_map.at(inpt)), range(access_map.at(outpt))); - +cout<<"access map in "< receivers = receiver_banks(inpt); - //cout << "Receiver banks for " << inpt << endl; vector mergeable; + cout << "num receivers " << receivers.size() << endl; for (auto bnk : receivers) { - //cout << tab(1) << bnk.name << ", # read offsets: " << bnk.read_delays.size() << endl; - //cout << tab(2) << "# receivers: " << receivers.size() << endl; - if (options.debug_options.expect_all_linebuffers) { - //assert(receivers.size() == 1 || bnk.read_delays.size() == 2); - assert(bnk.read_delays.size() == 2); - } - if (bnk.read_delays.size() == 2) { - assert(bnk.read_delays[0] == 0); - mergeable.push_back(bnk); - } + if (bnk.read_delays.size() != 2) { + auto outpt_vect = bnk.get_out_ports(); + auto outpt = outpt_vect[0]; + + cout << "before splitting banks" << endl; +/* cout << " SCHEDULE : " << str(schedule.at(outpt)) << endl; + for (auto s : get_maps(schedule.at(outpt))) { + for (auto s_ : get_basic_maps(s)) { + cout << tab(1) << str(s_) << endl; + } + }*/ + remove_bank(outpt); + + vector split_banks; + for (auto m : get_maps(access_map.at(outpt))) { + for (auto m_ : get_basic_maps(m)) { + string new_output = outpt + "_" + to_string(counter); + access_map.insert(std::pair(new_output, to_umap(to_map(m_)))); + schedule.insert(std::pair(new_output, schedule.at(outpt))); + //cout << "ACCESS MAP INSERT " << endl; + + stack_bank b_ = compute_bank_info(inpt, new_output); + add_bank_between(inpt, outpt, b_); + if (b_.read_delays.size() == 2) { + mergeable.push_back(b_); + } +// access_map.erase(new_output); +// schedule.erase(new_output); + //for (int i = 0; i < b_.read_delays.size(); i++) { + // cout << "counter: " << counter << " " << " NEW BANK READ DELAYS: " << b_.read_delays[i] << endl; + //} + //cout << "ACCESS MAP: " << str(m_) << endl; + counter++; + } + } + } else { + if (options.debug_options.expect_all_linebuffers) { + //assert(receivers.size() == 1 || bnk.read_delays.size() == 2); + assert(bnk.read_delays.size() == 2); + } + if (bnk.read_delays.size() == 2) { + assert(bnk.read_delays[0] == 0); + mergeable.push_back(bnk); + } + } } - if (mergeable.size() > 0) { - merge_bank(options, inpt, mergeable); - auto banks = get_banks(); - //cout << "finished create bank!" << endl; - //for (bank bk : banks) { - //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; - //cout << tab(1); - //for (int dl: bk.read_delays) { - //cout << dl << "," ; - //} - //cout << endl; - //for (auto dl: bk.delay_map) { - //cout < 0) { +cout << "before merge bank call" << endl; + merge_bank(options, inpt, mergeable); +// auto banks = get_banks(); + } + //cout << "finished create bank!" << endl; + //for (bank bk : banks) { + //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; + //cout << tab(1); + //for (int dl: bk.read_delays) { + //cout << dl << "," ; + //} + //cout << endl; + //for (auto dl: bk.delay_map) { + //cout < schedule; std::map > port_bundles; - map, stack_bank > stack_banks; + map, std::vector > stack_banks; map selectors; //lowering ubuffer to memtile @@ -916,9 +916,12 @@ class UBuffer { } bank get_bank(const std::string& name) const { + cout << "bank name in get_bank " << name << endl; for (auto b : stack_banks) { - if (b.second.name == name) { - return b.second; + for (auto b_ : b.second) { + if (b_.name == name) { + return b_; + } } } cout << "Error: No such bank as: " << name << endl; @@ -928,8 +931,10 @@ class UBuffer { string get_bank_input(const std::string& name) const { for (auto b : stack_banks) { - if (b.second.name == name) { - return b.first.first; + for (auto b_ : b.second) { + if (b_.name == name) { + return b.first.first; + } } } cout << "Error: No such bank as: " << name << endl; @@ -940,8 +945,10 @@ class UBuffer { std::set get_bank_inputs(const std::string& name) const { std::set ret; for (auto b : stack_banks) { - if (b.second.name == name) { - ret.insert(b.first.first); + for (auto b_ : b.second) { + if (b_.name == name) { + ret.insert(b.first.first); + } } } return ret; @@ -950,28 +957,36 @@ class UBuffer { std::set get_bank_outputs(const std::string& name) const { std::set ret; for (auto b : stack_banks) { - if (b.second.name == name) { - ret.insert(b.first.second); + for (auto b_ : b.second) { + if (b_.name == name) { + ret.insert(b.first.second); + } } } return ret; } void replace_bank(stack_bank& target, stack_bank& replacement) { + //cout << "target name " << target.name << " replace name " << replacement.name << endl; for (auto bnk : stack_banks) { - if (bnk.second.name == target.name) { - stack_banks[bnk.first] = replacement; - break; + for (int i = 0; i < bnk.second.size(); i++) { + auto b_ = bnk.second[i]; + if (b_.name == target.name) { + stack_banks[bnk.first][i] = replacement; + + break; + } } } } + // removes all banks at this output port void remove_bank(string pt_name) { - map, bank> replace; + map, std::vector> replace; for (auto bnk : stack_banks) { - if (bnk.first.second != pt_name) { - replace.insert(bnk); - } + if (bnk.first.second != pt_name) { + replace.insert(bnk); + } } stack_banks = replace; } @@ -986,34 +1001,54 @@ class UBuffer { vector get_banks() { vector bnk; std::set done; - for (auto bs : stack_banks) { - if (!elem(bs.second.name, done)) { - bnk.push_back(bs.second); - done.insert(bs.second.name); + for (auto b : stack_banks) { + for (auto b_ : b.second) { + cout << "bank name " << b_.name << endl; + if (!elem(b_.name, done)){ + bnk.push_back(b_); + done.insert(b_.name); + } } } return bnk; } void add_bank_between(const std::string& inpt, const std::string& outpt, stack_bank& bank) { - stack_banks[{inpt, outpt}] = bank; + + if (has_bank_between(inpt, outpt)) { + std::vector b_ = stack_banks[{inpt, outpt}]; + b_.push_back(bank); + stack_banks[{inpt, outpt}] = b_; + //stack_banks[{inpt, outpt}] = bank; + } else { + std::vector b_; + b_.push_back(bank); + stack_banks[{inpt, outpt}] = b_; + } } + // returns true if at least one bank between given input and output ports bool has_bank_between(const std::string& inpt, const std::string& outpt) const { for (auto bs : stack_banks) { if (bs.first.first == inpt && bs.first.second == outpt) { return true; } + } return false; } + // returns name of ONE bank in between given input and output ports, even + // if there are multiple banks -- may want to consider extending this to + // returning ALL banks between given input and output port string bank_between(const std::string& inpt, const std::string& outpt) const { for (auto bs : stack_banks) { if (bs.first.first == inpt && bs.first.second == outpt) { - return bs.second.name; + auto first_bank = bs.second[0]; + cout << "first bank name " << first_bank.name << endl; + return first_bank.name; } } @@ -1022,23 +1057,51 @@ class UBuffer { return ""; } + std::vector banks_between(const std::string& inpt, const std::string& outpt) const { + std::vector bank_names; + for (auto bs : stack_banks) { + if (bs.first.first == inpt && bs.first.second == outpt) { + for (auto b_ : bs.second) { + bank_names.push_back(b_.name); + } + return bank_names; + } + } + + cout << "Error: No bank between: " << inpt << " and " << outpt << endl; + assert(false); + return {""}; + } + + bank get_bank_between(const std::string& inpt, const std::string& outpt) const { string bk_name = bank_between(inpt, outpt); return get_bank(bk_name); } + + std::vector get_banks_between(const std::string& inpt, const std::string& outpt) const { + std::vector bank_names = banks_between(inpt, outpt); + std::vector banks; + for (auto name : bank_names) { + banks.push_back(get_bank(name)); + } + return banks; + } vector receiver_banks(const std::string& inpt) { vector bnks; vector done; for (auto bs : stack_banks) { - if (bs.first.first == inpt) { + for (auto b_ : bs.second) { + if (bs.first.first == inpt) { + + if (!elem(b_.name, done)) { + bnks.push_back(b_); + done.push_back(b_.name); + } - if (!elem(bs.second.name, done)) { - bnks.push_back(bs.second); - done.push_back(bs.second.name); + //assert(bnks.back().read_delays.size() == bs.second.read_delays.size()); } - - //assert(bnks.back().read_delays.size() == bs.second.read_delays.size()); } } return bnks; @@ -1085,7 +1148,8 @@ class UBuffer { for (auto outpt: get_out_ports()) { if (buf.has_bank_between(inpt, outpt)) { stack_banks[make_pair(inpt, outpt)] = - buf.get_bank_between(inpt, outpt); + //{buf.get_bank_between(inpt, outpt)}; + buf.get_banks_between(inpt, outpt); } } }