From 78747a92b0075f4d55c235c9f82a196a5828b0ed Mon Sep 17 00:00:00 2001 From: Sophia Liu Date: Tue, 2 Jun 2020 23:04:20 -0700 Subject: [PATCH 01/33] bank split --- build_set_test.cpp | 31 +++ conv_1d_bc.cpp | 44 ++--- gaussian_pyramid.cpp | 454 +++++++++++++++++++++---------------------- ubuffer.cpp | 58 ++++-- 4 files changed, 313 insertions(+), 274 deletions(-) diff --git a/build_set_test.cpp b/build_set_test.cpp index c9126d8db..36e6a6ce2 100644 --- a/build_set_test.cpp +++ b/build_set_test.cpp @@ -998,6 +998,37 @@ void conv_1d_bc_test() { assert(res == 0); } +prog conv_1d_bc_mirror() { + prog prg; + prg.compute_unit_file = "accumulate_3.h"; + prg.name = "conv_1d_bc"; + prg.add_input("in"); + prg.add_output("out"); + prg.buffer_port_widths["in"] = 32; + prg.buffer_port_widths["out"] = 32; + prg.buffer_port_widths["M"] = 32; + + auto p = prg.add_loop("p", 0, 10); + auto write = p->add_op("get_input"); + write->add_load("in", "p"); + write->add_store("M", "p"); + + auto c = prg.add_loop("c", 0, 10); + auto compute = c->add_op("compute_output"); + compute->add_function("accumulate_3"); + /*compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}}); + compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}}); + compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}});*/ + compute->add_load("M", {{"0 <= c < 9", "c"}, {"c >= 9", "9"}}); + compute->add_load("M", {{"0 <= c < 8", "c + 1"}, {"c >= 8", "9"}}); + compute->add_load("M", {{"0 <= c < 7", "c + 2"}, {"c >= 7", "9"}}); +/* compute->add_load("M", "min(c, 9)"); + compute->add_load("M", "min(c + 1, 9)"); + compute->add_load("M", "min(c + 2, 9)");*/ + compute->add_store("out", "c"); + return prg; +} + prog conv_1d_bc() { prog prg; prg.compute_unit_file = "accumulate_3.h"; diff --git a/conv_1d_bc.cpp b/conv_1d_bc.cpp index 8efbcd282..bf6cc916b 100644 --- a/conv_1d_bc.cpp +++ b/conv_1d_bc.cpp @@ -10,33 +10,13 @@ using namespace std; #include "hw_classes.h" -struct M_get_input_0_to_M_compute_output_3_cache { +struct M_get_input_0_merged_banks_3_cache { // RAM Box: {[0, 9]} // Capacity: 3 // # of read delays: 3 - fifo , 3> f; - inline hw_uint<32> peek(const int offset) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.peek(2 - offset); - } - - - - inline void push(const hw_uint<32> value) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.push(value); - } - -}; - -struct M_get_input_0_merged_banks_2_cache { - // RAM Box: {[0, 9]} - // Capacity: 2 - // # of read delays: 2 hw_uint<32> f0; hw_uint<32> f2; + hw_uint<32> f4; inline hw_uint<32> peek_0() { @@ -47,10 +27,18 @@ struct M_get_input_0_merged_banks_2_cache { return f2; } + inline hw_uint<32> peek_2() { + return f4; + } + inline void push(const hw_uint<32> value) { #ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 1 + f4 = f2; +#ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ // cap: 1 reading from capacity: 1 f2 = f0; @@ -61,15 +49,13 @@ struct M_get_input_0_merged_banks_2_cache { }; struct M_cache { - M_get_input_0_to_M_compute_output_3_cache M_get_input_0_to_M_compute_output_3; - M_get_input_0_merged_banks_2_cache M_get_input_0_merged_banks_2; + M_get_input_0_merged_banks_3_cache M_get_input_0_merged_banks_3; }; inline void M_get_input_0_write(hw_uint<32> & M_get_input_0, M_cache& M, int root, int p) { - M.M_get_input_0_to_M_compute_output_3.push(M_get_input_0); - M.M_get_input_0_merged_banks_2.push(M_get_input_0); + M.M_get_input_0_merged_banks_3.push(M_get_input_0); } inline hw_uint<32> M_compute_output_3_select(M_cache& M, int root, int c) { @@ -79,7 +65,7 @@ inline hw_uint<32> M_compute_output_3_select(M_cache& M, int root, int c) { // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } // DD fold: { compute_output[root, c] -> 2 : root = 0 and 0 <= c <= 7; compute_output[root, c] -> 1 : root = 0 and c = 8 } - auto value_M_get_input_0 = M.M_get_input_0_to_M_compute_output_3.peek(/* one reader or all rams */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); + auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek(/* Needs general delay string */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); return value_M_get_input_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; @@ -95,7 +81,7 @@ inline hw_uint<32> M_compute_output_4_select(M_cache& M, int root, int c) { // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } // DD fold: { compute_output[root, c] -> 1 : root = 0 and 0 <= c <= 7 } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_2.peek(/* Needs general delay string */ (7 - c >= 0) ? (1) : 0); + auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek(/* Needs general delay string */ (7 - c >= 0) ? (1) : 0); return value_M_get_input_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; @@ -111,7 +97,7 @@ inline hw_uint<32> M_compute_output_5_select(M_cache& M, int root, int c) { // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } // DD fold: { } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_2.peek_0(); + auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek_0(); return value_M_get_input_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; diff --git a/gaussian_pyramid.cpp b/gaussian_pyramid.cpp index 730ba76f6..b415c6348 100644 --- a/gaussian_pyramid.cpp +++ b/gaussian_pyramid.cpp @@ -10,7 +10,7 @@ using namespace std; #include "hw_classes.h" -struct I_store_I_from_in_2_merged_banks_9_cache { +struct I_store_I_from_in_26_merged_banks_9_cache { // RAM Box: {[0, 31], [0, 31]} // Capacity: 67 // # of read delays: 9 @@ -125,24 +125,24 @@ struct I_store_I_from_in_2_merged_banks_9_cache { }; struct I_cache { - I_store_I_from_in_2_merged_banks_9_cache I_store_I_from_in_2_merged_banks_9; + I_store_I_from_in_26_merged_banks_9_cache I_store_I_from_in_26_merged_banks_9; }; -inline void I_store_I_from_in_2_write(hw_uint<32> & I_store_I_from_in_2, I_cache& I, int root, int pr, int pc) { - I.I_store_I_from_in_2_merged_banks_9.push(I_store_I_from_in_2); +inline void I_store_I_from_in_26_write(hw_uint<32> & I_store_I_from_in_26, I_cache& I, int root, int pr, int pc) { + I.I_store_I_from_in_26_merged_banks_9.push(I_store_I_from_in_26); } -inline hw_uint<32> I_I_blr_conv_3_30_10_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_17_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_10 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_17 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 32 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 28; I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> (3 + I_blr_c) : root = 0 and I_blr_c = 29 and 0 <= I_blr_r <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_32(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 66 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_66(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -150,15 +150,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_10_select(I_cache& I, int root, int I_blr_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_11_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_18_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_11 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_18 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 2 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_2(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 65 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_65(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -166,15 +166,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_11_select(I_cache& I, int root, int I_blr_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_12_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_19_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_12 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_19 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 1 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_1(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 64 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 28; I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> (35 + I_blr_c) : root = 0 and I_blr_c = 29 and 0 <= I_blr_r <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_64(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -182,15 +182,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_12_select(I_cache& I, int root, int I_blr_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_13_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_20_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_13 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_20 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_0(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 34 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_34(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -198,15 +198,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_13_select(I_cache& I, int root, int I_blr_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_5_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_21_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_5 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_21 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 66 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_66(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 33 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_33(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -214,15 +214,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_5_select(I_cache& I, int root, int I_blr_r #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_6_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_22_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_6 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_22 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 65 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_65(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 32 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 28; I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> (3 + I_blr_c) : root = 0 and I_blr_c = 29 and 0 <= I_blr_r <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_32(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -230,15 +230,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_6_select(I_cache& I, int root, int I_blr_r #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_7_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_23_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_7 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_23 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 64 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 28; I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> (35 + I_blr_c) : root = 0 and I_blr_c = 29 and 0 <= I_blr_r <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_64(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 2 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_2(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -246,15 +246,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_7_select(I_cache& I, int root, int I_blr_r #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_8_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_24_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_8 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_24 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 34 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_34(); - return value_I_store_I_from_in_2; + // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 1 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_1(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -262,15 +262,15 @@ inline hw_uint<32> I_I_blr_conv_3_30_8_select(I_cache& I, int root, int I_blr_r #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_I_blr_conv_3_30_9_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { +inline hw_uint<32> I_I_blr_conv_3_30_25_select(I_cache& I, int root, int I_blr_r, int I_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_I_blr_conv_3_30_9 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[1 + I_blr_r, 1 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } + // I_I_blr_conv_3_30_25 read pattern: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> I[2 + I_blr_r, 2 + I_blr_c] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Read schedule : { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // Write schedule: { store_I_from_in[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 31 and 0 <= pc <= 31 } - // DD fold: { I_blr_conv_3_30[root, I_blr_r, I_blr_c] -> 33 : root = 0 and 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } - auto value_I_store_I_from_in_2 = I.I_store_I_from_in_2_merged_banks_9.peek_33(); - return value_I_store_I_from_in_2; + // DD fold: { } + auto value_I_store_I_from_in_26 = I.I_store_I_from_in_26_merged_banks_9.peek_0(); + return value_I_store_I_from_in_26; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_r = " << I_blr_r << " I_blr_c = " << I_blr_c << endl; assert(false); @@ -280,59 +280,59 @@ inline hw_uint<32> I_I_blr_conv_3_30_9_select(I_cache& I, int root, int I_blr_r // # of bundles = 2 // I_blr_conv_3_30_read -// I_I_blr_conv_3_30_5 -// I_I_blr_conv_3_30_6 -// I_I_blr_conv_3_30_7 -// I_I_blr_conv_3_30_8 -// I_I_blr_conv_3_30_9 -// I_I_blr_conv_3_30_10 -// I_I_blr_conv_3_30_11 -// I_I_blr_conv_3_30_12 -// I_I_blr_conv_3_30_13 +// I_I_blr_conv_3_30_17 +// I_I_blr_conv_3_30_18 +// I_I_blr_conv_3_30_19 +// I_I_blr_conv_3_30_20 +// I_I_blr_conv_3_30_21 +// I_I_blr_conv_3_30_22 +// I_I_blr_conv_3_30_23 +// I_I_blr_conv_3_30_24 +// I_I_blr_conv_3_30_25 inline hw_uint<288> I_I_blr_conv_3_30_read_bundle_read(I_cache& I, int root, int I_blr_r, int I_blr_c) { // # of ports in bundle: 9 - // I_I_blr_conv_3_30_5 - // I_I_blr_conv_3_30_6 - // I_I_blr_conv_3_30_7 - // I_I_blr_conv_3_30_8 - // I_I_blr_conv_3_30_9 - // I_I_blr_conv_3_30_10 - // I_I_blr_conv_3_30_11 - // I_I_blr_conv_3_30_12 - // I_I_blr_conv_3_30_13 + // I_I_blr_conv_3_30_17 + // I_I_blr_conv_3_30_18 + // I_I_blr_conv_3_30_19 + // I_I_blr_conv_3_30_20 + // I_I_blr_conv_3_30_21 + // I_I_blr_conv_3_30_22 + // I_I_blr_conv_3_30_23 + // I_I_blr_conv_3_30_24 + // I_I_blr_conv_3_30_25 hw_uint<288> result; - hw_uint<32> I_I_blr_conv_3_30_5_res = I_I_blr_conv_3_30_5_select(I, root, I_blr_r, I_blr_c); - set_at<0, 288>(result, I_I_blr_conv_3_30_5_res); - hw_uint<32> I_I_blr_conv_3_30_6_res = I_I_blr_conv_3_30_6_select(I, root, I_blr_r, I_blr_c); - set_at<32, 288>(result, I_I_blr_conv_3_30_6_res); - hw_uint<32> I_I_blr_conv_3_30_7_res = I_I_blr_conv_3_30_7_select(I, root, I_blr_r, I_blr_c); - set_at<64, 288>(result, I_I_blr_conv_3_30_7_res); - hw_uint<32> I_I_blr_conv_3_30_8_res = I_I_blr_conv_3_30_8_select(I, root, I_blr_r, I_blr_c); - set_at<96, 288>(result, I_I_blr_conv_3_30_8_res); - hw_uint<32> I_I_blr_conv_3_30_9_res = I_I_blr_conv_3_30_9_select(I, root, I_blr_r, I_blr_c); - set_at<128, 288>(result, I_I_blr_conv_3_30_9_res); - hw_uint<32> I_I_blr_conv_3_30_10_res = I_I_blr_conv_3_30_10_select(I, root, I_blr_r, I_blr_c); - set_at<160, 288>(result, I_I_blr_conv_3_30_10_res); - hw_uint<32> I_I_blr_conv_3_30_11_res = I_I_blr_conv_3_30_11_select(I, root, I_blr_r, I_blr_c); - set_at<192, 288>(result, I_I_blr_conv_3_30_11_res); - hw_uint<32> I_I_blr_conv_3_30_12_res = I_I_blr_conv_3_30_12_select(I, root, I_blr_r, I_blr_c); - set_at<224, 288>(result, I_I_blr_conv_3_30_12_res); - hw_uint<32> I_I_blr_conv_3_30_13_res = I_I_blr_conv_3_30_13_select(I, root, I_blr_r, I_blr_c); - set_at<256, 288>(result, I_I_blr_conv_3_30_13_res); + hw_uint<32> I_I_blr_conv_3_30_17_res = I_I_blr_conv_3_30_17_select(I, root, I_blr_r, I_blr_c); + set_at<0, 288>(result, I_I_blr_conv_3_30_17_res); + hw_uint<32> I_I_blr_conv_3_30_18_res = I_I_blr_conv_3_30_18_select(I, root, I_blr_r, I_blr_c); + set_at<32, 288>(result, I_I_blr_conv_3_30_18_res); + hw_uint<32> I_I_blr_conv_3_30_19_res = I_I_blr_conv_3_30_19_select(I, root, I_blr_r, I_blr_c); + set_at<64, 288>(result, I_I_blr_conv_3_30_19_res); + hw_uint<32> I_I_blr_conv_3_30_20_res = I_I_blr_conv_3_30_20_select(I, root, I_blr_r, I_blr_c); + set_at<96, 288>(result, I_I_blr_conv_3_30_20_res); + hw_uint<32> I_I_blr_conv_3_30_21_res = I_I_blr_conv_3_30_21_select(I, root, I_blr_r, I_blr_c); + set_at<128, 288>(result, I_I_blr_conv_3_30_21_res); + hw_uint<32> I_I_blr_conv_3_30_22_res = I_I_blr_conv_3_30_22_select(I, root, I_blr_r, I_blr_c); + set_at<160, 288>(result, I_I_blr_conv_3_30_22_res); + hw_uint<32> I_I_blr_conv_3_30_23_res = I_I_blr_conv_3_30_23_select(I, root, I_blr_r, I_blr_c); + set_at<192, 288>(result, I_I_blr_conv_3_30_23_res); + hw_uint<32> I_I_blr_conv_3_30_24_res = I_I_blr_conv_3_30_24_select(I, root, I_blr_r, I_blr_c); + set_at<224, 288>(result, I_I_blr_conv_3_30_24_res); + hw_uint<32> I_I_blr_conv_3_30_25_res = I_I_blr_conv_3_30_25_select(I, root, I_blr_r, I_blr_c); + set_at<256, 288>(result, I_I_blr_conv_3_30_25_res); return result; } // store_I_from_in_write -// I_store_I_from_in_2 +// I_store_I_from_in_26 inline void I_store_I_from_in_write_bundle_write(hw_uint<32>& store_I_from_in_write, I_cache& I, int root, int pr, int pc) { - hw_uint<32> I_store_I_from_in_2_res = store_I_from_in_write.extract<0, 31>(); - I_store_I_from_in_2_write(I_store_I_from_in_2_res, I, root, pr, pc); + hw_uint<32> I_store_I_from_in_26_res = store_I_from_in_write.extract<0, 31>(); + I_store_I_from_in_26_write(I_store_I_from_in_26_res, I, root, pr, pc); } #include "hw_classes.h" -struct I_blr_I_blr_conv_3_30_4_merged_banks_1_cache { +struct I_blr_I_blr_conv_3_30_16_merged_banks_1_cache { // RAM Box: {[0, 29], [0, 29]} // Capacity: 1 // # of read delays: 1 @@ -354,13 +354,13 @@ struct I_blr_I_blr_conv_3_30_4_merged_banks_1_cache { }; struct I_blr_cache { - I_blr_I_blr_conv_3_30_4_merged_banks_1_cache I_blr_I_blr_conv_3_30_4_merged_banks_1; + I_blr_I_blr_conv_3_30_16_merged_banks_1_cache I_blr_I_blr_conv_3_30_16_merged_banks_1; }; -inline void I_blr_I_blr_conv_3_30_4_write(hw_uint<32> & I_blr_I_blr_conv_3_30_4, I_blr_cache& I_blr, int root, int I_blr_r, int I_blr_c) { - I_blr.I_blr_I_blr_conv_3_30_4_merged_banks_1.push(I_blr_I_blr_conv_3_30_4); +inline void I_blr_I_blr_conv_3_30_16_write(hw_uint<32> & I_blr_I_blr_conv_3_30_16, I_blr_cache& I_blr, int root, int I_blr_r, int I_blr_c) { + I_blr.I_blr_I_blr_conv_3_30_16_merged_banks_1.push(I_blr_I_blr_conv_3_30_16); } inline hw_uint<32> I_blr_I_blr_ds_id0_1_select(I_blr_cache& I_blr, int root, int I_blr_ds_r, int I_blr_ds_c) { @@ -370,8 +370,8 @@ inline hw_uint<32> I_blr_I_blr_ds_id0_1_select(I_blr_cache& I_blr, int root, in // Read schedule : { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } // Write schedule: { I_blr_conv_3_30[root = 0, I_blr_r, I_blr_c] -> [2 + I_blr_r, 2 + I_blr_c, 1] : 0 <= I_blr_r <= 29 and 0 <= I_blr_c <= 29 } // DD fold: { } - auto value_I_blr_I_blr_conv_3_30_4 = I_blr.I_blr_I_blr_conv_3_30_4_merged_banks_1.peek(/* one reader or all rams */ 0); - return value_I_blr_I_blr_conv_3_30_4; + auto value_I_blr_I_blr_conv_3_30_16 = I_blr.I_blr_I_blr_conv_3_30_16_merged_banks_1.peek(/* one reader or all rams */ 0); + return value_I_blr_I_blr_conv_3_30_16; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_r = " << I_blr_ds_r << " I_blr_ds_c = " << I_blr_ds_c << endl; assert(false); @@ -381,10 +381,10 @@ inline hw_uint<32> I_blr_I_blr_ds_id0_1_select(I_blr_cache& I_blr, int root, in // # of bundles = 2 // I_blr_conv_3_30_write -// I_blr_I_blr_conv_3_30_4 +// I_blr_I_blr_conv_3_30_16 inline void I_blr_I_blr_conv_3_30_write_bundle_write(hw_uint<32>& I_blr_conv_3_30_write, I_blr_cache& I_blr, int root, int I_blr_r, int I_blr_c) { - hw_uint<32> I_blr_I_blr_conv_3_30_4_res = I_blr_conv_3_30_write.extract<0, 31>(); - I_blr_I_blr_conv_3_30_4_write(I_blr_I_blr_conv_3_30_4_res, I_blr, root, I_blr_r, I_blr_c); + hw_uint<32> I_blr_I_blr_conv_3_30_16_res = I_blr_conv_3_30_write.extract<0, 31>(); + I_blr_I_blr_conv_3_30_16_write(I_blr_I_blr_conv_3_30_16_res, I_blr, root, I_blr_r, I_blr_c); } // I_blr_ds_id0_read @@ -525,14 +525,14 @@ inline void I_blr_ds_I_blr_ds_id0_0_write(hw_uint<32> & I_blr_ds_I_blr_ds_id0_0, I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.push(I_blr_ds_I_blr_ds_id0_0); } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_15_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_10_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_15 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_10 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 32 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_32(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 15 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 11; I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> (3 + I_blr_ds_blr_c) : root = 0 and I_blr_ds_blr_c = 12 and 0 <= I_blr_ds_blr_r <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_15(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -541,14 +541,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_15_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_16_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_11_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_16 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_11 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 31 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_31(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 2 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_2(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -557,14 +557,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_16_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_17_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_12_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_17 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_12 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 30 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 11; I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> (18 + I_blr_ds_blr_c) : root = 0 and I_blr_ds_blr_c = 12 and 0 <= I_blr_ds_blr_r <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_30(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 1 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_1(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -573,14 +573,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_17_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_18_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_13_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_18 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_13 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 17 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_17(); + // DD fold: { } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_0(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -589,14 +589,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_18_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_19_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_5_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_19 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_5 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 16 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_16(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 32 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_32(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -605,14 +605,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_19_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_20_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_6_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_20 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_6 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 15 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 11; I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> (3 + I_blr_ds_blr_c) : root = 0 and I_blr_ds_blr_c = 12 and 0 <= I_blr_ds_blr_r <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_15(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 31 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_31(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -621,14 +621,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_20_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_21_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_7_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_21 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_7 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 2 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_2(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 30 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 11; I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> (18 + I_blr_ds_blr_c) : root = 0 and I_blr_ds_blr_c = 12 and 0 <= I_blr_ds_blr_r <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_30(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -637,14 +637,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_21_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_22_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_8_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_22 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_8 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 1 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_1(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 17 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_17(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -653,14 +653,14 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_22_select(I_blr_ds_cache& I_ #endif //__VIVADO_SYNTH__ } -inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_23_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { +inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_9_select(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_I_blr_ds_blr_conv_3_30_23 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[2 + I_blr_ds_blr_r, 2 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + // I_blr_ds_I_blr_ds_blr_conv_3_30_9 read pattern: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> I_blr_ds[1 + I_blr_ds_blr_r, 1 + I_blr_ds_blr_c] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Read schedule : { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // Write schedule: { I_blr_ds_id0[root = 0, I_blr_ds_r, I_blr_ds_c] -> [2 + 2I_blr_ds_r, 2 + 2I_blr_ds_c, 2] : 0 <= I_blr_ds_r <= 14 and 0 <= I_blr_ds_c <= 14 } - // DD fold: { } - auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_0(); + // DD fold: { I_blr_ds_blr_conv_3_30[root, I_blr_ds_blr_r, I_blr_ds_blr_c] -> 16 : root = 0 and 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } + auto value_I_blr_ds_I_blr_ds_id0_0 = I_blr_ds.I_blr_ds_I_blr_ds_id0_0_merged_banks_9.peek_16(); return value_I_blr_ds_I_blr_ds_id0_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_r = " << I_blr_ds_blr_r << " I_blr_ds_blr_c = " << I_blr_ds_blr_c << endl; @@ -671,46 +671,46 @@ inline hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_23_select(I_blr_ds_cache& I_ // # of bundles = 2 // I_blr_ds_blr_conv_3_30_read -// I_blr_ds_I_blr_ds_blr_conv_3_30_15 -// I_blr_ds_I_blr_ds_blr_conv_3_30_16 -// I_blr_ds_I_blr_ds_blr_conv_3_30_17 -// I_blr_ds_I_blr_ds_blr_conv_3_30_18 -// I_blr_ds_I_blr_ds_blr_conv_3_30_19 -// I_blr_ds_I_blr_ds_blr_conv_3_30_20 -// I_blr_ds_I_blr_ds_blr_conv_3_30_21 -// I_blr_ds_I_blr_ds_blr_conv_3_30_22 -// I_blr_ds_I_blr_ds_blr_conv_3_30_23 +// I_blr_ds_I_blr_ds_blr_conv_3_30_5 +// I_blr_ds_I_blr_ds_blr_conv_3_30_6 +// I_blr_ds_I_blr_ds_blr_conv_3_30_7 +// I_blr_ds_I_blr_ds_blr_conv_3_30_8 +// I_blr_ds_I_blr_ds_blr_conv_3_30_9 +// I_blr_ds_I_blr_ds_blr_conv_3_30_10 +// I_blr_ds_I_blr_ds_blr_conv_3_30_11 +// I_blr_ds_I_blr_ds_blr_conv_3_30_12 +// I_blr_ds_I_blr_ds_blr_conv_3_30_13 inline hw_uint<288> I_blr_ds_I_blr_ds_blr_conv_3_30_read_bundle_read(I_blr_ds_cache& I_blr_ds, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { // # of ports in bundle: 9 - // I_blr_ds_I_blr_ds_blr_conv_3_30_15 - // I_blr_ds_I_blr_ds_blr_conv_3_30_16 - // I_blr_ds_I_blr_ds_blr_conv_3_30_17 - // I_blr_ds_I_blr_ds_blr_conv_3_30_18 - // I_blr_ds_I_blr_ds_blr_conv_3_30_19 - // I_blr_ds_I_blr_ds_blr_conv_3_30_20 - // I_blr_ds_I_blr_ds_blr_conv_3_30_21 - // I_blr_ds_I_blr_ds_blr_conv_3_30_22 - // I_blr_ds_I_blr_ds_blr_conv_3_30_23 + // I_blr_ds_I_blr_ds_blr_conv_3_30_5 + // I_blr_ds_I_blr_ds_blr_conv_3_30_6 + // I_blr_ds_I_blr_ds_blr_conv_3_30_7 + // I_blr_ds_I_blr_ds_blr_conv_3_30_8 + // I_blr_ds_I_blr_ds_blr_conv_3_30_9 + // I_blr_ds_I_blr_ds_blr_conv_3_30_10 + // I_blr_ds_I_blr_ds_blr_conv_3_30_11 + // I_blr_ds_I_blr_ds_blr_conv_3_30_12 + // I_blr_ds_I_blr_ds_blr_conv_3_30_13 hw_uint<288> result; - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_15_res = I_blr_ds_I_blr_ds_blr_conv_3_30_15_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<0, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_15_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_16_res = I_blr_ds_I_blr_ds_blr_conv_3_30_16_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<32, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_16_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_17_res = I_blr_ds_I_blr_ds_blr_conv_3_30_17_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<64, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_17_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_18_res = I_blr_ds_I_blr_ds_blr_conv_3_30_18_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<96, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_18_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_19_res = I_blr_ds_I_blr_ds_blr_conv_3_30_19_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<128, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_19_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_20_res = I_blr_ds_I_blr_ds_blr_conv_3_30_20_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<160, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_20_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_21_res = I_blr_ds_I_blr_ds_blr_conv_3_30_21_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<192, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_21_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_22_res = I_blr_ds_I_blr_ds_blr_conv_3_30_22_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<224, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_22_res); - hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_23_res = I_blr_ds_I_blr_ds_blr_conv_3_30_23_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); - set_at<256, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_23_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_5_res = I_blr_ds_I_blr_ds_blr_conv_3_30_5_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<0, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_5_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_6_res = I_blr_ds_I_blr_ds_blr_conv_3_30_6_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<32, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_6_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_7_res = I_blr_ds_I_blr_ds_blr_conv_3_30_7_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<64, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_7_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_8_res = I_blr_ds_I_blr_ds_blr_conv_3_30_8_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<96, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_8_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_9_res = I_blr_ds_I_blr_ds_blr_conv_3_30_9_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<128, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_9_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_10_res = I_blr_ds_I_blr_ds_blr_conv_3_30_10_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<160, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_10_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_11_res = I_blr_ds_I_blr_ds_blr_conv_3_30_11_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<192, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_11_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_12_res = I_blr_ds_I_blr_ds_blr_conv_3_30_12_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<224, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_12_res); + hw_uint<32> I_blr_ds_I_blr_ds_blr_conv_3_30_13_res = I_blr_ds_I_blr_ds_blr_conv_3_30_13_select(I_blr_ds, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + set_at<256, 288>(result, I_blr_ds_I_blr_ds_blr_conv_3_30_13_res); return result; } @@ -723,7 +723,7 @@ inline void I_blr_ds_I_blr_ds_id0_write_bundle_write(hw_uint<32>& I_blr_ds_id0_w #include "hw_classes.h" -struct I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_cache { +struct I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_cache { // RAM Box: {[0, 12], [0, 12]} // Capacity: 39 // # of read delays: 39 @@ -745,24 +745,24 @@ struct I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id }; struct I_blr_ds_blr_cache { - I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_cache I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_25; + I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_cache I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_3; }; -inline void I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_write(hw_uint<32> & I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14, I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { - I_blr_ds_blr.I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_25.push(I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14); +inline void I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_write(hw_uint<32> & I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4, I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { + I_blr_ds_blr.I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_3.push(I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4); } -inline hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_select(I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { +inline hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_select(I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_blr_I_blr_ds_blr_ds_id0_25 read pattern: { I_blr_ds_blr_ds_id0[root = 0, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> I_blr_ds_blr[2I_blr_ds_blr_ds_r, 2I_blr_ds_blr_ds_c] : 0 <= I_blr_ds_blr_ds_r <= 5 and 0 <= I_blr_ds_blr_ds_c <= 5 } + // I_blr_ds_blr_I_blr_ds_blr_ds_id0_3 read pattern: { I_blr_ds_blr_ds_id0[root = 0, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> I_blr_ds_blr[2I_blr_ds_blr_ds_r, 2I_blr_ds_blr_ds_c] : 0 <= I_blr_ds_blr_ds_r <= 5 and 0 <= I_blr_ds_blr_ds_c <= 5 } // Read schedule : { I_blr_ds_blr_ds_id0[root = 0, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> [11 + 3I_blr_ds_blr_ds_r, 11 + 3I_blr_ds_blr_ds_c, 4] : 0 <= I_blr_ds_blr_ds_r <= 5 and 0 <= I_blr_ds_blr_ds_c <= 5 } // Write schedule: { I_blr_ds_blr_conv_3_30[root = 0, I_blr_ds_blr_r, I_blr_ds_blr_c] -> [6 + 2I_blr_ds_blr_r, 6 + 2I_blr_ds_blr_c, 3] : 0 <= I_blr_ds_blr_r <= 12 and 0 <= I_blr_ds_blr_c <= 12 } // DD fold: { I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> 2 : root = 0 and I_blr_ds_blr_ds_r = 5 and (1 + I_blr_ds_blr_ds_c) mod 2 = 0 and 0 < I_blr_ds_blr_ds_c <= 2; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> ((2 - I_blr_ds_blr_ds_c) + floor((I_blr_ds_blr_ds_c)/2)) : root = 0 and I_blr_ds_blr_ds_r = 5 and (I_blr_ds_blr_ds_c) mod 2 = 0 and 0 <= I_blr_ds_blr_ds_c <= 2; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> 1 : root = 0 and I_blr_ds_blr_ds_r = 5 and I_blr_ds_blr_ds_c = 3; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> (57/2 - 1/2 * I_blr_ds_blr_ds_c) : root = 0 and (1 + I_blr_ds_blr_ds_r) mod 2 = 0 and (1 + I_blr_ds_blr_ds_c) mod 2 = 0 and 0 < I_blr_ds_blr_ds_r <= 2 and 0 < I_blr_ds_blr_ds_c <= 5; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> (28 - 1/2 * I_blr_ds_blr_ds_c) : root = 0 and (1 + I_blr_ds_blr_ds_r) mod 2 = 0 and (I_blr_ds_blr_ds_c) mod 2 = 0 and 0 < I_blr_ds_blr_ds_r <= 2 and 0 <= I_blr_ds_blr_ds_c <= 5; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> ((38 - 13/2 * I_blr_ds_blr_ds_r) - 2 * I_blr_ds_blr_ds_c) : root = 0 and (I_blr_ds_blr_ds_r) mod 2 = 0 and 0 <= I_blr_ds_blr_ds_r <= 2 and 0 <= I_blr_ds_blr_ds_c <= 5; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> (31/2 - 1/2 * I_blr_ds_blr_ds_c) : root = 0 and I_blr_ds_blr_ds_r = 3 and (1 + I_blr_ds_blr_ds_c) mod 2 = 0 and 0 < I_blr_ds_blr_ds_c <= 5; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> (15 - 1/2 * I_blr_ds_blr_ds_c) : root = 0 and I_blr_ds_blr_ds_r = 3 and (I_blr_ds_blr_ds_c) mod 2 = 0 and 0 <= I_blr_ds_blr_ds_c <= 5; I_blr_ds_blr_ds_id0[root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> (12 - 2 * I_blr_ds_blr_ds_c) : I_blr_ds_blr_ds_r = 4 and root = 0 and 0 <= I_blr_ds_blr_ds_c <= 5 and ((root = 0 and (1 + I_blr_ds_blr_ds_c) mod 2 = 0) or (root = 0 and (I_blr_ds_blr_ds_c) mod 2 = 0)) } - auto value_I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14 = I_blr_ds_blr.I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_25.peek(/* one reader or all rams */ (-3 + I_blr_ds_blr_ds_c == 0 && -5 + I_blr_ds_blr_ds_r == 0) ? (1) : (-1 + I_blr_ds_blr_ds_c == 0 && -5 + I_blr_ds_blr_ds_r == 0) ? (2) : (-4 + I_blr_ds_blr_ds_r == 0) ? ((12 - 2 * I_blr_ds_blr_ds_c)) : ((-I_blr_ds_blr_ds_r) % 2 == 0 && 2 - I_blr_ds_blr_ds_r >= 0) ? ((((76 - 13 * I_blr_ds_blr_ds_r) - 4 * I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -5 + I_blr_ds_blr_ds_r == 0 && 2 - I_blr_ds_blr_ds_c >= 0) ? (((4 - I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -3 + I_blr_ds_blr_ds_r == 0) ? (((30 - I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -1 + I_blr_ds_blr_ds_r == 0) ? (((56 - I_blr_ds_blr_ds_c))/2) : ((-1 - I_blr_ds_blr_ds_c) % 2 == 0 && -3 + I_blr_ds_blr_ds_r == 0) ? (((31 - I_blr_ds_blr_ds_c))/2) : ((-1 - I_blr_ds_blr_ds_c) % 2 == 0 && -1 + I_blr_ds_blr_ds_r == 0) ? (((57 - I_blr_ds_blr_ds_c))/2) : 0); - return value_I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14; + auto value_I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4 = I_blr_ds_blr.I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_to_I_blr_ds_blr_I_blr_ds_blr_ds_id0_3.peek(/* one reader or all rams */ (-3 + I_blr_ds_blr_ds_c == 0 && -5 + I_blr_ds_blr_ds_r == 0) ? (1) : (-1 + I_blr_ds_blr_ds_c == 0 && -5 + I_blr_ds_blr_ds_r == 0) ? (2) : (-4 + I_blr_ds_blr_ds_r == 0) ? ((12 - 2 * I_blr_ds_blr_ds_c)) : ((-I_blr_ds_blr_ds_r) % 2 == 0 && 2 - I_blr_ds_blr_ds_r >= 0) ? ((((76 - 13 * I_blr_ds_blr_ds_r) - 4 * I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -5 + I_blr_ds_blr_ds_r == 0 && 2 - I_blr_ds_blr_ds_c >= 0) ? (((4 - I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -3 + I_blr_ds_blr_ds_r == 0) ? (((30 - I_blr_ds_blr_ds_c))/2) : ((-I_blr_ds_blr_ds_c) % 2 == 0 && -1 + I_blr_ds_blr_ds_r == 0) ? (((56 - I_blr_ds_blr_ds_c))/2) : ((-1 - I_blr_ds_blr_ds_c) % 2 == 0 && -3 + I_blr_ds_blr_ds_r == 0) ? (((31 - I_blr_ds_blr_ds_c))/2) : ((-1 - I_blr_ds_blr_ds_c) % 2 == 0 && -1 + I_blr_ds_blr_ds_r == 0) ? (((57 - I_blr_ds_blr_ds_c))/2) : 0); + return value_I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_ds_r = " << I_blr_ds_blr_ds_r << " I_blr_ds_blr_ds_c = " << I_blr_ds_blr_ds_c << endl; assert(false); @@ -772,27 +772,27 @@ inline hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_select(I_blr_ds_blr_cach // # of bundles = 2 // I_blr_ds_blr_conv_3_30_write -// I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14 +// I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4 inline void I_blr_ds_blr_I_blr_ds_blr_conv_3_30_write_bundle_write(hw_uint<32>& I_blr_ds_blr_conv_3_30_write, I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_r, int I_blr_ds_blr_c) { - hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_res = I_blr_ds_blr_conv_3_30_write.extract<0, 31>(); - I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_write(I_blr_ds_blr_I_blr_ds_blr_conv_3_30_14_res, I_blr_ds_blr, root, I_blr_ds_blr_r, I_blr_ds_blr_c); + hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_res = I_blr_ds_blr_conv_3_30_write.extract<0, 31>(); + I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_write(I_blr_ds_blr_I_blr_ds_blr_conv_3_30_4_res, I_blr_ds_blr, root, I_blr_ds_blr_r, I_blr_ds_blr_c); } // I_blr_ds_blr_ds_id0_read -// I_blr_ds_blr_I_blr_ds_blr_ds_id0_25 +// I_blr_ds_blr_I_blr_ds_blr_ds_id0_3 inline hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_read_bundle_read(I_blr_ds_blr_cache& I_blr_ds_blr, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { // # of ports in bundle: 1 - // I_blr_ds_blr_I_blr_ds_blr_ds_id0_25 + // I_blr_ds_blr_I_blr_ds_blr_ds_id0_3 hw_uint<32> result; - hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_res = I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_select(I_blr_ds_blr, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); - set_at<0, 32>(result, I_blr_ds_blr_I_blr_ds_blr_ds_id0_25_res); + hw_uint<32> I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_res = I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_select(I_blr_ds_blr, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); + set_at<0, 32>(result, I_blr_ds_blr_I_blr_ds_blr_ds_id0_3_res); return result; } #include "hw_classes.h" -struct I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_cache { +struct I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_cache { // RAM Box: {[0, 5], [0, 5]} // Capacity: 12 // # of read delays: 12 @@ -814,24 +814,24 @@ struct I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_ }; struct I_blr_ds_blr_ds_cache { - I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_cache I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27; + I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_cache I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15; }; -inline void I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_write(hw_uint<32> & I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24, I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { - I_blr_ds_blr_ds.I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27.push(I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24); +inline void I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_write(hw_uint<32> & I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2, I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { + I_blr_ds_blr_ds.I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15.push(I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2); } -inline hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_select(I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_out_r, int I_blr_ds_blr_ds_out_c) { +inline hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_select(I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_out_r, int I_blr_ds_blr_ds_out_c) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27 read pattern: { store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root = 0, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> I_blr_ds_blr_ds[I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] : 0 <= I_blr_ds_blr_ds_out_r <= 5 and 0 <= I_blr_ds_blr_ds_out_c <= 5 } + // I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15 read pattern: { store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root = 0, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> I_blr_ds_blr_ds[I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] : 0 <= I_blr_ds_blr_ds_out_r <= 5 and 0 <= I_blr_ds_blr_ds_out_c <= 5 } // Read schedule : { store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root = 0, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> [16 + 2I_blr_ds_blr_ds_out_r, 16 + 2I_blr_ds_blr_ds_out_c, 5] : 0 <= I_blr_ds_blr_ds_out_r <= 5 and 0 <= I_blr_ds_blr_ds_out_c <= 5 } // Write schedule: { I_blr_ds_blr_ds_id0[root = 0, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c] -> [11 + 3I_blr_ds_blr_ds_r, 11 + 3I_blr_ds_blr_ds_c, 4] : 0 <= I_blr_ds_blr_ds_r <= 5 and 0 <= I_blr_ds_blr_ds_c <= 5 } // DD fold: { store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> 1 : root = 0 and I_blr_ds_blr_ds_out_r = 5 and 0 <= I_blr_ds_blr_ds_out_c <= 1; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> 1 : root = 0 and I_blr_ds_blr_ds_out_r = 5 and I_blr_ds_blr_ds_out_c = 2; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> (11 - I_blr_ds_blr_ds_out_c) : root = 0 and 0 <= I_blr_ds_blr_ds_out_r <= 1 and 0 <= I_blr_ds_blr_ds_out_c <= 4; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> 6 : root = 0 and I_blr_ds_blr_ds_out_c = 5 and 0 <= I_blr_ds_blr_ds_out_r <= 1; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> 7 : root = 0 and I_blr_ds_blr_ds_out_r = 2 and (1 + I_blr_ds_blr_ds_out_c) mod 3 = 0 and 2 <= I_blr_ds_blr_ds_out_c <= 4; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> (8/3 + 2/3 * I_blr_ds_blr_ds_out_c) : root = 0 and I_blr_ds_blr_ds_out_r = 2 and I_blr_ds_blr_ds_out_c = 5; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> (7 - floor((1 + I_blr_ds_blr_ds_out_c)/3)) : root = 0 and I_blr_ds_blr_ds_out_r = 2 and 0 <= I_blr_ds_blr_ds_out_c <= 4 and 3*floor((1 + I_blr_ds_blr_ds_out_c)/3) <= I_blr_ds_blr_ds_out_c; store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds[root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c] -> (5 - I_blr_ds_blr_ds_out_c) : root = 0 and 3 <= I_blr_ds_blr_ds_out_r <= 4 and 0 <= I_blr_ds_blr_ds_out_c <= 4 and ((3*floor((1 + I_blr_ds_blr_ds_out_c)/3) <= I_blr_ds_blr_ds_out_c) or (1 + I_blr_ds_blr_ds_out_c) mod 3 = 0) } - auto value_I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24 = I_blr_ds_blr_ds.I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27.peek(/* one reader or all rams */ (-5 + I_blr_ds_blr_ds_out_r == 0 && 2 - I_blr_ds_blr_ds_out_c >= 0) ? (1) : (-5 + I_blr_ds_blr_ds_out_c == 0 && 1 - I_blr_ds_blr_ds_out_r >= 0) ? (6) : (-2 + I_blr_ds_blr_ds_out_c == 0 && -2 + I_blr_ds_blr_ds_out_r == 0) ? (7) : ((-3 + I_blr_ds_blr_ds_out_r >= 0 && 4 - I_blr_ds_blr_ds_out_r >= 0 && I_blr_ds_blr_ds_out_c - 3*floord(1 + I_blr_ds_blr_ds_out_c, 3) >= 0) || (-2 + I_blr_ds_blr_ds_out_c == 0 && -3 + I_blr_ds_blr_ds_out_r >= 0 && 4 - I_blr_ds_blr_ds_out_r >= 0)) ? ((5 - I_blr_ds_blr_ds_out_c)) : (4 - I_blr_ds_blr_ds_out_c >= 0 && 1 - I_blr_ds_blr_ds_out_r >= 0) ? ((11 - I_blr_ds_blr_ds_out_c)) : (-5 + I_blr_ds_blr_ds_out_c == 0 && -2 + I_blr_ds_blr_ds_out_r == 0) ? (6) : (-2 + I_blr_ds_blr_ds_out_r == 0 && I_blr_ds_blr_ds_out_c - 3*floord(1 + I_blr_ds_blr_ds_out_c, 3) >= 0) ? ((7 - floord(1 + I_blr_ds_blr_ds_out_c, 3))) : 0); - return value_I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24; + auto value_I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2 = I_blr_ds_blr_ds.I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_to_I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15.peek(/* one reader or all rams */ (-5 + I_blr_ds_blr_ds_out_r == 0 && 2 - I_blr_ds_blr_ds_out_c >= 0) ? (1) : (-5 + I_blr_ds_blr_ds_out_c == 0 && 1 - I_blr_ds_blr_ds_out_r >= 0) ? (6) : (-2 + I_blr_ds_blr_ds_out_c == 0 && -2 + I_blr_ds_blr_ds_out_r == 0) ? (7) : ((-3 + I_blr_ds_blr_ds_out_r >= 0 && 4 - I_blr_ds_blr_ds_out_r >= 0 && I_blr_ds_blr_ds_out_c - 3*floord(1 + I_blr_ds_blr_ds_out_c, 3) >= 0) || (-2 + I_blr_ds_blr_ds_out_c == 0 && -3 + I_blr_ds_blr_ds_out_r >= 0 && 4 - I_blr_ds_blr_ds_out_r >= 0)) ? ((5 - I_blr_ds_blr_ds_out_c)) : (4 - I_blr_ds_blr_ds_out_c >= 0 && 1 - I_blr_ds_blr_ds_out_r >= 0) ? ((11 - I_blr_ds_blr_ds_out_c)) : (-5 + I_blr_ds_blr_ds_out_c == 0 && -2 + I_blr_ds_blr_ds_out_r == 0) ? (6) : (-2 + I_blr_ds_blr_ds_out_r == 0 && I_blr_ds_blr_ds_out_c - 3*floord(1 + I_blr_ds_blr_ds_out_c, 3) >= 0) ? ((7 - floord(1 + I_blr_ds_blr_ds_out_c, 3))) : 0); + return value_I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " I_blr_ds_blr_ds_out_r = " << I_blr_ds_blr_ds_out_r << " I_blr_ds_blr_ds_out_c = " << I_blr_ds_blr_ds_out_c << endl; assert(false); @@ -841,21 +841,21 @@ inline hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ // # of bundles = 2 // I_blr_ds_blr_ds_id0_write -// I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24 +// I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2 inline void I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_write_bundle_write(hw_uint<32>& I_blr_ds_blr_ds_id0_write, I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { - hw_uint<32> I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_res = I_blr_ds_blr_ds_id0_write.extract<0, 31>(); - I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_write(I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_24_res, I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); + hw_uint<32> I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_res = I_blr_ds_blr_ds_id0_write.extract<0, 31>(); + I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_write(I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_2_res, I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); } // store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_read -// I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27 +// I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15 inline hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_read_bundle_read(I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_out_r, int I_blr_ds_blr_ds_out_c) { // # of ports in bundle: 1 - // I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27 + // I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15 hw_uint<32> result; - hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_res = I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_select(I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c); - set_at<0, 32>(result, I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_27_res); + hw_uint<32> I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_res = I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_select(I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c); + set_at<0, 32>(result, I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_15_res); return result; } @@ -878,27 +878,16 @@ inline void I_blr_ds_id0(I_blr_cache& I_blr, I_blr_ds_cache& I_blr_ds, int root, } -inline void store_I_from_in(HWStream >& /* buffer_args num ports = 1 */in, I_cache& I, int root, int pr, int pc) { - // Consume: in - auto in_pr_c__pc_value = in.read(); - // Produce: I - I_store_I_from_in_write_bundle_write(in_pr_c__pc_value, I, root, pr, pc); - -#ifndef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - -} - -inline void I_blr_conv_3_30(I_cache& I, I_blr_cache& I_blr, int root, int I_blr_r, int I_blr_c) { - // Consume: I - auto I_I_blr_r__p__0_c__I_blr_c__p__0_value = I_I_blr_conv_3_30_read_bundle_read(I/* source_delay */, root, I_blr_r, I_blr_c); +inline void I_blr_ds_blr_ds_id0(I_blr_ds_blr_cache& I_blr_ds_blr, I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { + // Consume: I_blr_ds_blr + auto I_blr_ds_blr_2_m_I_blr_ds_blr_ds_r_c__2_m_I_blr_ds_blr_ds_c_value = I_blr_ds_blr_I_blr_ds_blr_ds_id0_read_bundle_read(I_blr_ds_blr/* source_delay */, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - auto compute_result = conv_3_3(I_I_blr_r__p__0_c__I_blr_c__p__0_value); - // Produce: I_blr - I_blr_I_blr_conv_3_30_write_bundle_write(compute_result, I_blr, root, I_blr_r, I_blr_c); + auto compute_result = id(I_blr_ds_blr_2_m_I_blr_ds_blr_ds_r_c__2_m_I_blr_ds_blr_ds_c_value); + // Produce: I_blr_ds_blr_ds + I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_write_bundle_write(compute_result, I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ @@ -921,31 +910,42 @@ inline void I_blr_ds_blr_conv_3_30(I_blr_ds_cache& I_blr_ds, I_blr_ds_blr_cache& } -inline void I_blr_ds_blr_ds_id0(I_blr_ds_blr_cache& I_blr_ds_blr, I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, int root, int I_blr_ds_blr_ds_r, int I_blr_ds_blr_ds_c) { - // Consume: I_blr_ds_blr - auto I_blr_ds_blr_2_m_I_blr_ds_blr_ds_r_c__2_m_I_blr_ds_blr_ds_c_value = I_blr_ds_blr_I_blr_ds_blr_ds_id0_read_bundle_read(I_blr_ds_blr/* source_delay */, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); +inline void store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds(I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, HWStream >& /* buffer_args num ports = 1 */I_blr_ds_blr_ds_out, int root, int I_blr_ds_blr_ds_out_r, int I_blr_ds_blr_ds_out_c) { + // Consume: I_blr_ds_blr_ds + auto I_blr_ds_blr_ds_I_blr_ds_blr_ds_out_r_c__I_blr_ds_blr_ds_out_c_value = I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_read_bundle_read(I_blr_ds_blr_ds/* source_delay */, root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - auto compute_result = id(I_blr_ds_blr_2_m_I_blr_ds_blr_ds_r_c__2_m_I_blr_ds_blr_ds_c_value); - // Produce: I_blr_ds_blr_ds - I_blr_ds_blr_ds_I_blr_ds_blr_ds_id0_write_bundle_write(compute_result, I_blr_ds_blr_ds, root, I_blr_ds_blr_ds_r, I_blr_ds_blr_ds_c); + // Produce: I_blr_ds_blr_ds_out + I_blr_ds_blr_ds_out.write(I_blr_ds_blr_ds_I_blr_ds_blr_ds_out_r_c__I_blr_ds_blr_ds_out_c_value); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ } -inline void store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds(I_blr_ds_blr_ds_cache& I_blr_ds_blr_ds, HWStream >& /* buffer_args num ports = 1 */I_blr_ds_blr_ds_out, int root, int I_blr_ds_blr_ds_out_r, int I_blr_ds_blr_ds_out_c) { - // Consume: I_blr_ds_blr_ds - auto I_blr_ds_blr_ds_I_blr_ds_blr_ds_out_r_c__I_blr_ds_blr_ds_out_c_value = I_blr_ds_blr_ds_store_I_blr_ds_blr_ds_out_from_I_blr_ds_blr_ds_read_bundle_read(I_blr_ds_blr_ds/* source_delay */, root, I_blr_ds_blr_ds_out_r, I_blr_ds_blr_ds_out_c); +inline void I_blr_conv_3_30(I_cache& I, I_blr_cache& I_blr, int root, int I_blr_r, int I_blr_c) { + // Consume: I + auto I_I_blr_r__p__0_c__I_blr_c__p__0_value = I_I_blr_conv_3_30_read_bundle_read(I/* source_delay */, root, I_blr_r, I_blr_c); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ - // Produce: I_blr_ds_blr_ds_out - I_blr_ds_blr_ds_out.write(I_blr_ds_blr_ds_I_blr_ds_blr_ds_out_r_c__I_blr_ds_blr_ds_out_c_value); + auto compute_result = conv_3_3(I_I_blr_r__p__0_c__I_blr_c__p__0_value); + // Produce: I_blr + I_blr_I_blr_conv_3_30_write_bundle_write(compute_result, I_blr, root, I_blr_r, I_blr_c); + +#ifndef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + +} + +inline void store_I_from_in(HWStream >& /* buffer_args num ports = 1 */in, I_cache& I, int root, int pr, int pc) { + // Consume: in + auto in_pr_c__pc_value = in.read(); + // Produce: I + I_store_I_from_in_write_bundle_write(in_pr_c__pc_value, I, root, pr, pc); #ifndef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ diff --git a/ubuffer.cpp b/ubuffer.cpp index 9b95630e8..2757c56a7 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1167,7 +1167,7 @@ void UBuffer::merge_bank(CodegenOptions& options, string inpt, vector merged.maxdelay) { @@ -1272,12 +1272,34 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { for (auto inpt : get_in_ports()) { // try to turn the banks for this inpt into one big linebuffer vector receivers = receiver_banks(inpt); - //cout << "Receiver banks for " << inpt << endl; + cout << "Receiver banks for " << inpt << endl; vector mergeable; for (auto bnk : receivers) { - //cout << tab(1) << bnk.name << ", # read offsets: " << bnk.read_delays.size() << endl; - //cout << tab(2) << "# receivers: " << receivers.size() << endl; - +cout<<"============================================"< 0) { merge_bank(options, inpt, mergeable); auto banks = get_banks(); - //cout << "finished create bank!" << endl; - //for (bank bk : banks) { - //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; - //cout << tab(1); - //for (int dl: bk.read_delays) { - //cout << dl << "," ; - //} - //cout << endl; - //for (auto dl: bk.delay_map) { - //cout < Date: Wed, 3 Jun 2020 15:40:18 -0700 Subject: [PATCH 02/33] trying bank splitting --- conv_1d_bc.cpp | 39 +++++++++++++++++++++++------ ubuffer.cpp | 68 +++++++++++++++++++++++++++++--------------------- 2 files changed, 70 insertions(+), 37 deletions(-) diff --git a/conv_1d_bc.cpp b/conv_1d_bc.cpp index bf6cc916b..f706348a8 100644 --- a/conv_1d_bc.cpp +++ b/conv_1d_bc.cpp @@ -10,30 +10,51 @@ using namespace std; #include "hw_classes.h" +struct M_get_input_0_to_M_compute_output_3_cache { + // RAM Box: {[0, 9]} + // Capacity: 3 + // # of read delays: 3 + fifo , 3> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(2 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + struct M_get_input_0_merged_banks_3_cache { // RAM Box: {[0, 9]} // Capacity: 3 // # of read delays: 3 - hw_uint<32> f0; - hw_uint<32> f2; - hw_uint<32> f4; + f0; + f2; + f4; - inline hw_uint<32> peek_0() { + inline peek_0() { return f0; } - inline hw_uint<32> peek_1() { + inline peek_1() { return f2; } - inline hw_uint<32> peek_2() { + inline peek_2() { return f4; } - inline void push(const hw_uint<32> value) { + inline void push(const value) { #ifdef __VIVADO_SYNTH__ #endif //__VIVADO_SYNTH__ // cap: 1 reading from capacity: 1 @@ -49,12 +70,14 @@ struct M_get_input_0_merged_banks_3_cache { }; struct M_cache { + M_get_input_0_to_M_compute_output_3_cache M_get_input_0_to_M_compute_output_3; M_get_input_0_merged_banks_3_cache M_get_input_0_merged_banks_3; }; inline void M_get_input_0_write(hw_uint<32> & M_get_input_0, M_cache& M, int root, int p) { + M.M_get_input_0_to_M_compute_output_3.push(M_get_input_0); M.M_get_input_0_merged_banks_3.push(M_get_input_0); } @@ -65,7 +88,7 @@ inline hw_uint<32> M_compute_output_3_select(M_cache& M, int root, int c) { // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } // DD fold: { compute_output[root, c] -> 2 : root = 0 and 0 <= c <= 7; compute_output[root, c] -> 1 : root = 0 and c = 8 } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek(/* Needs general delay string */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); + auto value_M_get_input_0 = M.M_get_input_0_to_M_compute_output_3.peek(/* one reader or all rams */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); return value_M_get_input_0; #ifndef __VIVADO_SYNTH__ cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; diff --git a/ubuffer.cpp b/ubuffer.cpp index 2757c56a7..f60e506b2 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1278,37 +1278,47 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { cout<<"============================================"< 0) { From 81c16331aaf271e5f3bef559ba91bfca50cff83c Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Wed, 3 Jun 2020 16:12:45 -0700 Subject: [PATCH 03/33] unique names for banks --- ubuffer.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index f60e506b2..05274fb8f 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1167,7 +1167,7 @@ void UBuffer::merge_bank(CodegenOptions& options, string inpt, vector merged.maxdelay) { @@ -1269,33 +1269,37 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { } } + int counter = 0; + for (auto inpt : get_in_ports()) { // try to turn the banks for this inpt into one big linebuffer vector receivers = receiver_banks(inpt); cout << "Receiver banks for " << inpt << endl; vector mergeable; + for (auto bnk : receivers) { cout<<"============================================"< 0) { +cout << "mergeable size is greater than 0" << endl; +cout << "inpt "< Date: Wed, 3 Jun 2020 18:56:29 -0700 Subject: [PATCH 04/33] merging is happening, but need to mux --- conv_1d_bc.cpp | 262 +------------------------------------------------ ubuffer.cpp | 24 ++++- utils.h | 1 + 3 files changed, 21 insertions(+), 266 deletions(-) diff --git a/conv_1d_bc.cpp b/conv_1d_bc.cpp index f706348a8..5711af6d9 100644 --- a/conv_1d_bc.cpp +++ b/conv_1d_bc.cpp @@ -10,264 +10,4 @@ using namespace std; #include "hw_classes.h" -struct M_get_input_0_to_M_compute_output_3_cache { - // RAM Box: {[0, 9]} - // Capacity: 3 - // # of read delays: 3 - fifo , 3> f; - inline hw_uint<32> peek(const int offset) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.peek(2 - offset); - } - - - - inline void push(const hw_uint<32> value) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - return f.push(value); - } - -}; - -struct M_get_input_0_merged_banks_3_cache { - // RAM Box: {[0, 9]} - // Capacity: 3 - // # of read delays: 3 - f0; - f2; - f4; - - - inline peek_0() { - return f0; - } - - inline peek_1() { - return f2; - } - - inline peek_2() { - return f4; - } - - - - inline void push(const value) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // cap: 1 reading from capacity: 1 - f4 = f2; -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // cap: 1 reading from capacity: 1 - f2 = f0; - // cap: 1 - f0 = value; - } - -}; - -struct M_cache { - M_get_input_0_to_M_compute_output_3_cache M_get_input_0_to_M_compute_output_3; - M_get_input_0_merged_banks_3_cache M_get_input_0_merged_banks_3; -}; - - - -inline void M_get_input_0_write(hw_uint<32> & M_get_input_0, M_cache& M, int root, int p) { - M.M_get_input_0_to_M_compute_output_3.push(M_get_input_0); - M.M_get_input_0_merged_banks_3.push(M_get_input_0); -} - -inline hw_uint<32> M_compute_output_3_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_3 read pattern: { compute_output[root = 0, c] -> M[c] : 0 <= c <= 8; compute_output[root = 0, c = 9] -> M[9] } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { compute_output[root, c] -> 2 : root = 0 and 0 <= c <= 7; compute_output[root, c] -> 1 : root = 0 and c = 8 } - auto value_M_get_input_0 = M.M_get_input_0_to_M_compute_output_3.peek(/* one reader or all rams */ (-8 + c == 0) ? (1) : (7 - c >= 0) ? (2) : 0); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -inline hw_uint<32> M_compute_output_4_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_4 read pattern: { compute_output[root = 0, c] -> M[9] : 8 <= c <= 9; compute_output[root = 0, c] -> M[1 + c] : 0 <= c <= 7 } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { compute_output[root, c] -> 1 : root = 0 and 0 <= c <= 7 } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek(/* Needs general delay string */ (7 - c >= 0) ? (1) : 0); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -inline hw_uint<32> M_compute_output_5_select(M_cache& M, int root, int c) { -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - // M_compute_output_5 read pattern: { compute_output[root = 0, c] -> M[9] : 7 <= c <= 9; compute_output[root = 0, c] -> M[2 + c] : 0 <= c <= 6 } - // Read schedule : { compute_output[root = 0, c] -> [2 + c, 1] : 0 <= c <= 9 } - // Write schedule: { get_input[root = 0, p] -> [p, 0] : 0 <= p <= 9 } - // DD fold: { } - auto value_M_get_input_0 = M.M_get_input_0_merged_banks_3.peek_0(); - return value_M_get_input_0; -#ifndef __VIVADO_SYNTH__ - cout << "Error: Unsupported offsets: " << " root = " << root << " c = " << c << endl; - assert(false); - return 0; -#endif //__VIVADO_SYNTH__ -} - -// # of bundles = 3 -// M_get_input_0 -// M_get_input_0 -inline void M_M_get_input_0_bundle_write(hw_uint<32>& M_get_input_0, M_cache& M, int root, int p) { - hw_uint<32> M_get_input_0_res = M_get_input_0.extract<0, 31>(); - M_get_input_0_write(M_get_input_0_res, M, root, p); -} - -// compute_output_read -// M_compute_output_3 -// M_compute_output_4 -// M_compute_output_5 -inline hw_uint<96> M_compute_output_read_bundle_read(M_cache& M, int root, int c) { - // # of ports in bundle: 3 - // M_compute_output_3 - // M_compute_output_4 - // M_compute_output_5 - - hw_uint<96> result; - hw_uint<32> M_compute_output_3_res = M_compute_output_3_select(M, root, c); - set_at<0, 96>(result, M_compute_output_3_res); - hw_uint<32> M_compute_output_4_res = M_compute_output_4_select(M, root, c); - set_at<32, 96>(result, M_compute_output_4_res); - hw_uint<32> M_compute_output_5_res = M_compute_output_5_select(M, root, c); - set_at<64, 96>(result, M_compute_output_5_res); - return result; -} - -// get_input_write -// M_get_input_0 -inline void M_get_input_write_bundle_write(hw_uint<32>& get_input_write, M_cache& M, int root, int p) { - hw_uint<32> M_get_input_0_res = get_input_write.extract<0, 31>(); - M_get_input_0_write(M_get_input_0_res, M, root, p); -} - - - -// Operation logic -inline void get_input(HWStream >& /* buffer_args num ports = 1 */in, M_cache& M, int root, int p) { - // Consume: in - auto in_p_value = in.read(); - // Produce: M - M_get_input_write_bundle_write(in_p_value, M, root, p); - -#ifndef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - -} - -inline void compute_output(HWStream >& /* buffer_args num ports = 1 */out, int root, int c) { - auto compute_result = accumulate_3(); - // Produce: out - out.write(compute_result); - -#ifndef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ - -} - -// Driver function -void conv_1d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out, int num_epochs) { - -#ifndef __VIVADO_SYNTH__ - ofstream debug_file("conv_1d_bc_debug.csv"); - global_debug_handle = &debug_file; -#endif //__VIVADO_SYNTH__ - M_cache M; -#ifdef __VIVADO_SYNTH__ -#endif //__VIVADO_SYNTH__ -#ifdef __VIVADO_SYNTH__ -#pragma HLS inline recursive -#endif // __VIVADO_SYNTH__ - - for (int epoch = 0; epoch < num_epochs; epoch++) { - for (int c0 = 0; c0 <= 11; c0 += 1) { - if (c0 <= 9) - get_input(in, M, 0, c0); - if (c0 >= 2) - compute_output(M, out, 0, c0 - 2); - } - - } - -#ifndef __VIVADO_SYNTH__ - debug_file.close(); -#endif //__VIVADO_SYNTH__ -} - -void conv_1d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out) { - - conv_1d_bc(in, out, 1); -} -#ifdef __VIVADO_SYNTH__ -const int get_input_read_num_transfers = 0; -const int compute_output_write_num_transfers = 0; - - -extern "C" { - -static void read_get_input_read(hw_uint<32>* input, HWStream >& v, const int size) { - hw_uint<32> burst_reg; - int num_transfers = get_input_read_num_transfers*size; - for (int i = 0; i < num_transfers; i++) { - #pragma HLS pipeline II=1 - burst_reg = input[i]; - v.write(burst_reg); - } -} - -static void write_compute_output_write(hw_uint<32>* output, HWStream >& v, const int size) { - hw_uint<32> burst_reg; - int num_transfers = compute_output_write_num_transfers*size; - for (int i = 0; i < num_transfers; i++) { - #pragma HLS pipeline II=1 - burst_reg = v.read(); - output[i] = burst_reg; - } -} - -void conv_1d_bc_accel(hw_uint<32>* get_input_read, hw_uint<32>* compute_output_write, const int size) { -#pragma HLS dataflow -#pragma HLS INTERFACE m_axi port = get_input_read offset = slave depth = 65536 bundle = gmem0 -#pragma HLS INTERFACE m_axi port = compute_output_write offset = slave depth = 65536 bundle = gmem1 - -#pragma HLS INTERFACE s_axilite port = get_input_read bundle = control -#pragma HLS INTERFACE s_axilite port = compute_output_write bundle = control -#pragma HLS INTERFACE s_axilite port = size bundle = control -#pragma HLS INTERFACE s_axilite port = return bundle = control - - static HWStream > get_input_read_channel; - static HWStream > compute_output_write_channel; - - read_get_input_read(get_input_read, get_input_read_channel, size); - - conv_1d_bc(get_input_read_channel, compute_output_write_channel, size); - - write_compute_output_write(compute_output_write, compute_output_write_channel, size); -} - -} -#endif //__VIVADO_SYNTH__ - +struct M_get_input_0_split_banks2_0_cache { diff --git a/ubuffer.cpp b/ubuffer.cpp index 05274fb8f..221175dd8 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1162,9 +1162,11 @@ void UBuffer::merge_bank(CodegenOptions& options, string inpt, vector Date: Wed, 3 Jun 2020 19:26:46 -0700 Subject: [PATCH 05/33] print --- ubuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 221175dd8..39f396ccb 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1319,7 +1319,7 @@ cout<<"============================================"< Date: Fri, 5 Jun 2020 19:19:28 -0700 Subject: [PATCH 06/33] remove conv 1d file --- conv_1d_bc.cpp | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 conv_1d_bc.cpp diff --git a/conv_1d_bc.cpp b/conv_1d_bc.cpp deleted file mode 100644 index 5711af6d9..000000000 --- a/conv_1d_bc.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __VIVADO_SYNTH__ -#include -using namespace std; - - // Debug utility - ofstream* global_debug_handle; - -#endif //__VIVADO_SYNTH__ -#include "accumulate_3.h" - -#include "hw_classes.h" - -struct M_get_input_0_split_banks2_0_cache { From ef097037f925f55e66bf76c0e267de77fc72b306 Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Fri, 5 Jun 2020 19:38:35 -0700 Subject: [PATCH 07/33] changing stack_banks data structure --- ubuffer.cpp | 34 +++++++++----- ubuffer.h | 127 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 121 insertions(+), 40 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 39f396ccb..13f86a328 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -380,11 +380,13 @@ void generate_vivado_tcl(UBuffer& buf) { void UBuffer::generate_coreir(CodegenOptions& options, CoreIR::ModuleDef* def) { auto context = def->getContext(); - for (auto it : stack_banks) { - auto connection = it.first; - auto bk = it.second; + //for (auto it : stack_banks) { + //auto connection = it.first; + // note that the second in stack_banks pair is now a vector, so below + // statement would need to be changed for print + //auto bk = it.second; //cout << "[inpt: " << connection.first << "] -> [bk: " << bk.name << "] -> [outpt:" << connection.second << "]\n"; - } + //} //map save the register map wire2out; @@ -523,9 +525,11 @@ void generate_code_prefix(CodegenOptions& options, concat(args, dimension_var_decls(inpt, buf)); string var_args = comma_list(dimension_var_args(inpt, buf)); + // write func for every input port that gets called in this bundle out << "inline void " << inpt << "_write("; out << comma_list(args) << ") {" << endl; + // copy and broadcast whenever write to port is done //Different ram type, different address for (auto sb : buf.receiver_banks(inpt)) { //if (sb.tp == BANK_TYPE_STACK) { @@ -686,9 +690,12 @@ selector generate_select(CodegenOptions& options, std::ostream& out, const strin map in_ports_to_conditions; +// input select needs to be added for (auto inpt : possible_ports) { + // domain of output port auto write_ops = domain(buf.access_map.at(outpt)); + // values written auto written = range(buf.access_map.at(inpt)); auto read = @@ -737,7 +744,7 @@ selector generate_select(CodegenOptions& options, std::ostream& out, const strin void generate_bundles(CodegenOptions& options, std::ostream& out, UBuffer& buf) { out << "// # of bundles = " << buf.port_bundles.size() << endl; - + //broadcast input to every lane for (auto b : buf.port_bundles) { //cout << "\t generate for bundle: " << b.first << endl; out << "// " << b.first << endl; @@ -1264,6 +1271,9 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { auto overlap = its(range(access_map.at(inpt)), range(access_map.at(outpt))); + cout << "access map input " << str(access_map.at(inpt)) << endl; + cout << "access map output " << str(access_map.at(outpt)) << endl; + if (!empty(overlap)) { stack_bank bank = compute_bank_info(inpt, outpt); add_bank_between(inpt, outpt, bank); @@ -1306,7 +1316,8 @@ cout<<"============================================"< sv_domain; + // string port name, piecewise + // compute operations -> addresses (multi D) std::map access_map; std::map schedule; std::map > port_bundles; - //post processed access map + //post processed access map - ignore std::map access_pattern; - map, stack_bank > stack_banks; + // input port -> output port bank + // input ports -> switching networks that set inputs to banks, array of + // banks , output ports + // generalize this mapping -> mixing + matching + // addressing scheme: bank index, addr in bank + // indices on domain of access map -> bank nums it's reading + // generalize data structure + // pair of ports + piece of access pattern + // 1 bank bw input + output for each piece + map, std::vector > stack_banks; map selectors; //lowering ubuffer to memtile @@ -873,8 +884,10 @@ class UBuffer { bank get_bank(const std::string& name) const { for (auto b : stack_banks) { - if (b.second.name == name) { - return b.second; + for (auto b_ : b.second) { + if (b_.name == name) { + return b_; + } } } cout << "Error: No such bank as: " << name << endl; @@ -884,8 +897,10 @@ class UBuffer { string get_bank_input(const std::string& name) const { for (auto b : stack_banks) { - if (b.second.name == name) { - return b.first.first; + for (auto b_ : b.second) { + if (b_.name == name) { + return b.first.first; + } } } cout << "Error: No such bank as: " << name << endl; @@ -896,8 +911,10 @@ class UBuffer { std::set get_bank_inputs(const std::string& name) const { std::set ret; for (auto b : stack_banks) { - if (b.second.name == name) { - ret.insert(b.first.first); + for (auto b_ : b.second) { + if (b_.name == name) { + ret.insert(b.first.first); + } } } return ret; @@ -906,8 +923,10 @@ class UBuffer { std::set get_bank_outputs(const std::string& name) const { std::set ret; for (auto b : stack_banks) { - if (b.second.name == name) { - ret.insert(b.first.second); + for (auto b_ : b.second) { + if (b_.name == name) { + ret.insert(b.first.second); + } } } return ret; @@ -915,19 +934,24 @@ class UBuffer { void replace_bank(stack_bank& target, stack_bank& replacement) { for (auto bnk : stack_banks) { - if (bnk.second.name == target.name) { - stack_banks[bnk.first] = replacement; - break; + for (int i = 0; i < bnk.second.size(); i++) { + auto b_ = bnk.second[i]; + if (b_.name == target.name) { + bnk.second[i] = replacement; + // stack_banks[bnk.first] = replacement; + break; + } } } } + // removes all banks at this output port void remove_bank(string pt_name) { - map, bank> replace; + map, std::vector> replace; for (auto bnk : stack_banks) { - if (bnk.first.second != pt_name) { - replace.insert(bnk); - } + if (bnk.first.second != pt_name) { + replace.insert(bnk); + } } stack_banks = replace; } @@ -943,33 +967,48 @@ class UBuffer { vector bnk; std::set done; for (auto bs : stack_banks) { - if (!elem(bs.second.name, done)) { - bnk.push_back(bs.second); - done.insert(bs.second.name); + auto ex = bs.second; +cout << "element name " << ex[0].name << endl; + if (!elem(ex[0].name, done)) { + bnk.push_back(ex[0]); + done.insert(ex[0].name); } + /*for (auto b_ : bs.second) { + if (!elem(b_.name, done)) { + bnk.push_back(b_); + done.insert(b_.name); + } + }*/ } return bnk; } void add_bank_between(const std::string& inpt, const std::string& outpt, stack_bank& bank) { - stack_banks[{inpt, outpt}] = bank; + stack_banks[{inpt, outpt}].push_back(bank); + //stack_banks[{inpt, outpt}] = bank; } + // returns true if at least one bank between given input and output ports bool has_bank_between(const std::string& inpt, const std::string& outpt) const { for (auto bs : stack_banks) { if (bs.first.first == inpt && bs.first.second == outpt) { return true; } + } return false; } + // returns name of ONE bank in between given input and output ports, even + // if there are multiple banks -- may want to consider extending this to + // returning ALL banks between given input and output port string bank_between(const std::string& inpt, const std::string& outpt) const { for (auto bs : stack_banks) { if (bs.first.first == inpt && bs.first.second == outpt) { - return bs.second.name; + auto first_bank = bs.second[0]; + return first_bank.name; } } @@ -978,23 +1017,52 @@ class UBuffer { return ""; } + std::vector banks_between(const std::string& inpt, const std::string& outpt) const { + std::vector bank_names; + for (auto bs : stack_banks) { + if (bs.first.first == inpt && bs.first.second == outpt) { + for (auto b_ : bs.second) { + bank_names.push_back(b_.name); + } + return bank_names; + } + } + + cout << "Error: No bank between: " << inpt << " and " << outpt << endl; + assert(false); + return {""}; + } + + bank get_bank_between(const std::string& inpt, const std::string& outpt) const { string bk_name = bank_between(inpt, outpt); return get_bank(bk_name); } + +/* std::vector get_banks_between(const std::string& inpt, const std::string& outpt) const { + std::vector bank_names = banks_between(inpt, outpt); + std::vector banks; + for (auto name : bank_names) { + banks.push_back(get_bank(name)); + } + return banks; + } + */ vector receiver_banks(const std::string& inpt) { vector bnks; vector done; for (auto bs : stack_banks) { - if (bs.first.first == inpt) { + for (auto b_ : bs.second) { + if (bs.first.first == inpt) { + + if (!elem(b_.name, done)) { + bnks.push_back(b_); + done.push_back(b_.name); + } - if (!elem(bs.second.name, done)) { - bnks.push_back(bs.second); - done.push_back(bs.second.name); + //assert(bnks.back().read_delays.size() == bs.second.read_delays.size()); } - - //assert(bnks.back().read_delays.size() == bs.second.read_delays.size()); } } return bnks; @@ -1047,7 +1115,8 @@ class UBuffer { for (auto outpt: get_out_ports()) { if (buf.has_bank_between(inpt, outpt)) { stack_banks[make_pair(inpt, outpt)] = - buf.get_bank_between(inpt, outpt); + {buf.get_bank_between(inpt, outpt)}; + //buf.get_banks_betweeen(inpt, outpt); } } } From 6996d997a67e0fd54d97460d8a8308e962e8bf7b Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Fri, 5 Jun 2020 19:47:12 -0700 Subject: [PATCH 08/33] debugging seg fault --- ubuffer.cpp | 4 ++-- ubuffer.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 13f86a328..aff242ca2 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1299,7 +1299,7 @@ cout<<"============================================"< Date: Fri, 5 Jun 2020 19:50:41 -0700 Subject: [PATCH 09/33] commenting out splitting banks stuff --- ubuffer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index aff242ca2..cd11ee73b 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1347,7 +1347,6 @@ cout<<"============================================"< 0) { cout << "mergeable size is greater than 0" << endl; From f5f140cdb0f754dfdcb8d6be7f1ce2b0dc2d8e72 Mon Sep 17 00:00:00 2001 From: Sophia Liu Date: Fri, 5 Jun 2020 21:35:51 -0700 Subject: [PATCH 10/33] cout rm --- ubuffer.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index cd11ee73b..880ab0661 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1226,7 +1226,7 @@ void UBuffer::merge_bank(CodegenOptions& options, string inpt, vector Date: Fri, 5 Jun 2020 22:27:08 -0700 Subject: [PATCH 11/33] no couts in ubuffer.h, refactoring add bank between --- ubuffer.cpp | 70 ++++++++++++++++++++++++++--------------------------- ubuffer.h | 22 +++++++++++------ 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 880ab0661..856b9db74 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -538,7 +538,7 @@ void generate_code_prefix(CodegenOptions& options, } else if (options.inner_bank_offset_mode == INNER_BANK_OFFSET_LINEAR) { string linear_addr = buf.generate_linearize_ram_addr(inpt); - cout <<"Input port:" << inpt << ", Get ram string: " << linear_addr << endl; + //cout <<"Input port:" << inpt << ", Get ram string: " << linear_addr << endl; out << tab(1) << buf.name << "." << sb.name << ".write(" << inpt << ", " << linear_addr << ");" << endl; } @@ -1169,14 +1169,14 @@ void UBuffer::merge_bank(CodegenOptions& options, string inpt, vector merged.maxdelay) { @@ -1271,13 +1271,13 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { auto overlap = its(range(access_map.at(inpt)), range(access_map.at(outpt))); - cout << "access map input " << str(access_map.at(inpt)) << endl; - cout << "access map output " << str(access_map.at(outpt)) << endl; + //cout << "access map input " << str(access_map.at(inpt)) << endl; + //cout << "access map output " << str(access_map.at(outpt)) << endl; if (!empty(overlap)) { stack_bank bank = compute_bank_info(inpt, outpt); add_bank_between(inpt, outpt, bank); -cout<<"outpt: " << outpt << endl; +//cout<<"outpt: " << outpt << endl; } } } @@ -1287,11 +1287,11 @@ cout<<"outpt: " << outpt << endl; for (auto inpt : get_in_ports()) { // try to turn the banks for this inpt into one big linebuffer vector receivers = receiver_banks(inpt); - cout << "Receiver banks for " << inpt << endl; +// cout << "Receiver banks for " << inpt << endl; vector mergeable; for (auto bnk : receivers) { -cout<<"============================================"< 0) { -cout << "mergeable size is greater than 0" << endl; -cout << "inpt "<(pt_vec.begin() + i, pt_vec.end())); - cout <<"Port: " << pt_vec.at(i) << " ,get merge map: " << str(out_map_merge) << endl; + //cout <<"Port: " << pt_vec.at(i) << " ,get merge map: " << str(out_map_merge) << endl; outpt_merge.insert(make_pair(pt_vec.at(i), out_map_merge)); if (i == 0) { back_edge.insert(make_pair(pt_vec.at(i), input)); @@ -1466,7 +1466,7 @@ void UBuffer::port_group2bank(int in_port_width, int out_port_width) { replace_pt(it.first, it.second); //auto new_sched = dot(schedule.at(it.first), to_umap(it.second)); auto new_sched = assign_domain_to_map(to_map(schedule.at(it.first)), ::domain(it.second)); - cout << "new schedule with lib: " << str(new_sched) << endl; + //cout << "new schedule with lib: " << str(new_sched) << endl; schedule.at(it.first) = to_umap(new_sched); //add valid bound, mark the main output } @@ -1489,7 +1489,7 @@ void UBuffer::port_group2bank(int in_port_width, int out_port_width) { stack_bank super_bk = compute_bank_info(inpt_set, outpt_set); for (auto inpt: inpt_set) { for (auto outpt: outpt_set) { - cout << "Merge port: " << outpt << endl; + //cout << "Merge port: " << outpt << endl; add_bank_between(inpt, outpt, super_bk); } } @@ -1511,7 +1511,7 @@ void UBuffer::port_group2bank(int in_port_width, int out_port_width) { for (auto it : outpt_merge) { replace_pt(it.first, it.second); auto new_sched = assign_domain_to_map(to_map(schedule.at(it.first)), ::domain(it.second)); - cout << "new schedule with lib: " << str(new_sched) << endl; + //cout << "new schedule with lib: " << str(new_sched) << endl; schedule.at(it.first) = to_umap(new_sched); } for (auto it: back_edge) { @@ -1532,7 +1532,7 @@ void UBuffer::port_group2bank(int in_port_width, int out_port_width) { stack_bank super_bk = compute_bank_info(inpt_set, outpt_set); for (auto inpt: inpt_set) { for (auto outpt: outpt_set) { - cout << "Merge port: " << outpt << endl; + //cout << "Merge port: " << outpt << endl; add_bank_between(inpt, outpt, super_bk); } } @@ -1656,7 +1656,7 @@ Box UBuffer::get_bundle_box(const std::string & pt) { break; } } - cout << pt_name << endl; + //cout << pt_name << endl; auto pt_map = to_map(access_map.at(pt_name)); auto pt_range = range(pt_map); Box ret; @@ -1840,7 +1840,7 @@ map UBuffer::produce_vectorized_schedule(string in_bd_name, st out_new_sched_vec.push_back("0"); } else { - cout << "ERROR: The schedule is not considered\n\tin vec: " << in_sched_vec << "\n\tout vec: " << out_sched_vec << endl; + //cout << "ERROR: The schedule is not considered\n\tin vec: " << in_sched_vec << "\n\tout vec: " << out_sched_vec << endl; assert(false); } } @@ -1936,24 +1936,24 @@ void UBuffer::vectorization(int dim_id, int fetch_width, UBuffer& agg_buf, UBuff //assert(false); for (auto bd_name : in_bundle) { - cout << "Vectorize input port bundle: " << bd_name << endl; + //cout << "Vectorize input port bundle: " << bd_name << endl; for (auto in_pt_name : port_bundles.at(bd_name) ) { - cout << "\tvectorize input port: " << in_pt_name << endl; + //cout << "\tvectorize input port: " << in_pt_name << endl; auto acc_pattern = access_pattern.at(in_pt_name); auto acc_pattern_vec = acc_pattern.vectorization(dim_id, fetch_width); - std::cout << "before rewrite: " << acc_pattern << endl; +// std::cout << "before rewrite: " << acc_pattern << endl; //produce the operation transfomation isl_map* op_trans = acc_pattern.get_op_transform(ctx, dim_id, fetch_width); - std::cout << "transform rewrite: " << str(op_trans) << endl; + // std::cout << "transform rewrite: " << str(op_trans) << endl; auto rewrite_buf2op = dot(inv(access_map.at(in_pt_name)), op_trans); auto new_op_domain = pick(get_sets(range(rewrite_buf2op))); - cout << "rewrite buffer to op map: " << str(access_map.at(in_pt_name)) << endl; + // cout << "rewrite buffer to op map: " << str(access_map.at(in_pt_name)) << endl; //add in port to agg_buf auto inpt_acc_map = remap_access_to_new_buffer(in_pt_name, "_agg"); - cout << "Access map add to agg_in: " << str(inpt_acc_map) << endl; + // cout << "Access map add to agg_in: " << str(inpt_acc_map) << endl; agg_buf.add_in_pt(in_pt_name+"_in", domain.at(in_pt_name), inpt_acc_map, its(new_sched.at(acc_pattern.op_name), domain.at(in_pt_name))); agg_buf.port_bundles[bd_name+"_agg_in"].push_back(in_pt_name + "_in"); @@ -1967,18 +1967,18 @@ void UBuffer::vectorization(int dim_id, int fetch_width, UBuffer& agg_buf, UBuff } for (auto bd_name: out_bundle) { - cout << "Vectorize output port bundle: " << bd_name << endl; + //cout << "Vectorize output port bundle: " << bd_name << endl; for (auto out_pt_name : port_bundles.at(bd_name) ) { - cout << "\tVectorize output port: " << out_pt_name << endl; + //cout << "\tVectorize output port: " << out_pt_name << endl; auto acc_pattern = access_pattern.at(out_pt_name); auto acc_pattern_vec = acc_pattern.vectorization(dim_id, fetch_width); - std::cout << "before rewrite: " << acc_pattern << endl; +// std::cout << "before rewrite: " << acc_pattern << endl; //produce the operation transfomation isl_map* op_trans = acc_pattern.get_op_transform(ctx, dim_id, fetch_width); - std::cout << "transform rewrite: " << str(op_trans) << endl; +// std::cout << "transform rewrite: " << str(op_trans) << endl; auto rewrite_buf2op = dot(inv(access_map.at(out_pt_name)), op_trans); auto new_op_domain = pick(get_sets(range(rewrite_buf2op))); @@ -1986,7 +1986,7 @@ void UBuffer::vectorization(int dim_id, int fetch_width, UBuffer& agg_buf, UBuff auto outpt_acc_map = acc_pattern.get_access_map_and_decouple_reuse(ctx, dim_id); outpt_acc_map = add_range_suffix(outpt_acc_map, "_tb"); - cout << "Access map decouple reuse: " << str(outpt_acc_map) << endl; + //cout << "Access map decouple reuse: " << str(outpt_acc_map) << endl; tb.add_out_pt(out_pt_name+"_out", domain.at(out_pt_name), outpt_acc_map, its(new_sched.at(acc_pattern.op_name), domain.at(out_pt_name))); tb.port_bundles[bd_name+"_tb_out"].push_back(out_pt_name + "_out"); @@ -1998,10 +1998,10 @@ void UBuffer::vectorization(int dim_id, int fetch_width, UBuffer& agg_buf, UBuff } } - cout << "AGG : " << agg_buf << endl; - cout << "SRAM: " << sram << endl; - cout << "TB : " << tb << endl; - cout << "AGG Schedule: " << str(agg_buf.global_schedule()) << endl; - cout << "SRAM Schedule: " << str(sram.global_schedule()) << endl; - cout << "TB Schedule: " << str(tb.global_schedule()) << endl; + //cout << "AGG : " << agg_buf << endl; + //cout << "SRAM: " << sram << endl; + //cout << "TB : " << tb << endl; + //cout << "AGG Schedule: " << str(agg_buf.global_schedule()) << endl; + //cout << "SRAM Schedule: " << str(sram.global_schedule()) << endl; + //cout << "TB Schedule: " << str(tb.global_schedule()) << endl; } diff --git a/ubuffer.h b/ubuffer.h index 47f3cb707..d6d709b38 100644 --- a/ubuffer.h +++ b/ubuffer.h @@ -948,7 +948,7 @@ class UBuffer { // removes all banks at this output port void remove_bank(string pt_name) { - map, std::vector> replace; + map, std::vector> replace; for (auto bnk : stack_banks) { if (bnk.first.second != pt_name) { replace.insert(bnk); @@ -985,8 +985,17 @@ cout << "element name " << ex[0].name << endl; } void add_bank_between(const std::string& inpt, const std::string& outpt, stack_bank& bank) { - stack_banks[{inpt, outpt}].push_back(bank); - //stack_banks[{inpt, outpt}] = bank; + + if (has_bank_between(inpt, outpt)) { + std::vector b_ = stack_banks[{inpt, outpt}]; + b_.push_back(bank); + stack_banks[{inpt, outpt}] = b_; + //stack_banks[{inpt, outpt}] = bank; + } else { + std::vector b_; + b_.push_back(bank); + stack_banks[{inpt, outpt}] = b_; + } } // returns true if at least one bank between given input and output ports @@ -1041,7 +1050,7 @@ cout << "first bank name " << first_bank.name << endl; return get_bank(bk_name); } -/* std::vector get_banks_between(const std::string& inpt, const std::string& outpt) const { + std::vector get_banks_between(const std::string& inpt, const std::string& outpt) const { std::vector bank_names = banks_between(inpt, outpt); std::vector banks; for (auto name : bank_names) { @@ -1049,7 +1058,6 @@ cout << "first bank name " << first_bank.name << endl; } return banks; } - */ vector receiver_banks(const std::string& inpt) { vector bnks; @@ -1117,8 +1125,8 @@ cout << "first bank name " << first_bank.name << endl; for (auto outpt: get_out_ports()) { if (buf.has_bank_between(inpt, outpt)) { stack_banks[make_pair(inpt, outpt)] = - {buf.get_bank_between(inpt, outpt)}; - //buf.get_banks_betweeen(inpt, outpt); + //{buf.get_bank_between(inpt, outpt)}; + buf.get_banks_between(inpt, outpt); } } } From ff949115e0d3abe778bf0855cb00509e071e0f54 Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Fri, 5 Jun 2020 22:53:01 -0700 Subject: [PATCH 12/33] splitting banks output looks fine, but is og bank being removed? --- prog.cpp | 1 + ubuffer.cpp | 49 ++++++++----------------------------------------- ubuffer.h | 17 ++++++++++++----- utils.h | 1 - 4 files changed, 21 insertions(+), 47 deletions(-) diff --git a/prog.cpp b/prog.cpp index ad0baff09..92d553c73 100644 --- a/prog.cpp +++ b/prog.cpp @@ -1499,6 +1499,7 @@ module_type* generate_rtl_buffer(CodegenOptions& options, UBuffer& buffer) { minihls::block* blk = minigen.add_block(buffer.name); + cout << "&&&&&&&&&&&&&&&& prog.cpp" << endl; for (auto bank_struct : buffer.get_banks()) { auto bankprog = minigen.add_block(bank_struct.name); diff --git a/ubuffer.cpp b/ubuffer.cpp index 856b9db74..719d034f7 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -391,7 +391,7 @@ void UBuffer::generate_coreir(CodegenOptions& options, CoreIR::ModuleDef* def) { //map save the register map wire2out; map reg_in; - + cout << "&&&&&&&&&&&&&&&&&&&&& 394" << endl; for (auto bk : get_banks()) { std::set inpts = get_bank_inputs(bk.name); std::set outpts = get_bank_outputs(bk.name); @@ -501,12 +501,13 @@ void generate_code_prefix(CodegenOptions& options, //string inpt = buf.get_in_port(); out << "#include \"hw_classes.h\"" << endl << endl; +cout << "&&&&&&&&&&&&&&&&&&&&&&&&& 504 " << endl; for (auto b : buf.get_banks()) { generate_bank(options, out, b); } out << "struct " << buf.name << "_cache {" << endl; - +cout << "&&&&&&&&&&&&&&&&&&&&&&&& 510 " << endl; for (auto b : buf.get_banks()) { out << tab(1) << b.name << "_cache " @@ -1287,19 +1288,10 @@ void UBuffer::generate_bank_and_merge(CodegenOptions& options) { for (auto inpt : get_in_ports()) { // try to turn the banks for this inpt into one big linebuffer vector receivers = receiver_banks(inpt); -// cout << "Receiver banks for " << inpt << endl; vector mergeable; for (auto bnk : receivers) { -//cout<<"============================================"< 0) { -//cout << "mergeable size is greater than 0" << endl; -//cout << "inpt "< 0) { + merge_bank(options, inpt, mergeable); + } } } } diff --git a/ubuffer.h b/ubuffer.h index d6d709b38..05140c0e0 100644 --- a/ubuffer.h +++ b/ubuffer.h @@ -965,23 +965,30 @@ class UBuffer { } vector get_banks() { + cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << endl; vector bnk; - std::set done; + for (auto b : stack_banks) { + for (auto b_ : b.second) { + cout << "-------------------------------------------------------------------------------------------------------- " << b_.name << endl; + bnk.push_back(b_); + } + } + return bnk; + /*std::set done; for (auto bs : stack_banks) { auto ex = bs.second; -cout << "element name " << ex[0].name << endl; if (!elem(ex[0].name, done)) { bnk.push_back(ex[0]); done.insert(ex[0].name); - } + }*/ /*for (auto b_ : bs.second) { if (!elem(b_.name, done)) { bnk.push_back(b_); done.insert(b_.name); } - }*/ + } } - return bnk; + return bnk;*/ } void add_bank_between(const std::string& inpt, const std::string& outpt, stack_bank& bank) { diff --git a/utils.h b/utils.h index 216bc1ae8..bc7a9df2d 100644 --- a/utils.h +++ b/utils.h @@ -84,7 +84,6 @@ bool is_number(string s) { static inline int safe_stoi(const string s) { - cout << "safe stoi " << s << endl; if (is_number(s)) { return stoi(s); } From d7d42073da676648bda7053d736815337782f7de Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 6 Jun 2020 15:55:21 -0700 Subject: [PATCH 13/33] seg fault donegit add -u --- ubuffer.cpp | 133 +++++++++++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index d908597db..8dc8afd63 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1298,73 +1298,76 @@ cout << "&&&&&&&&&&&&&&&&&&&&&&&& 510 " << endl; } } - int counter = 0; - - for (auto inpt : get_in_ports()) { - // try to turn the banks for this inpt into one big linebuffer - vector receivers = receiver_banks(inpt); - vector mergeable; - - for (auto bnk : receivers) { - if (bnk.read_delays.size() != 2) { - cout << "splitting banks " << endl; - // splitting banks - stack_bank bank1, bank2; - bank1.tp = BANK_TYPE_STACK; - bank1.rddom = bnk.rddom; - bank1.name = inpt + "_split_banks1_" + to_string(counter); - bank1.pt_type_string = bnk.pt_type_string; - bank1.num_readers = mergeable.size(); - bank1.maxdelay = bnk.maxdelay; - - bank2.tp = BANK_TYPE_STACK; - bank2.rddom = bnk.rddom; - bank2.name = inpt + "_split_banks2_" + to_string(counter); - bank2.pt_type_string = bnk.pt_type_string; - bank2.num_readers = mergeable.size(); - bank2.maxdelay = bnk.maxdelay; - // read delays are offsets are within banks - // look at different pieces of access pattern - bank1.read_delays.push_back(bnk.read_delays[0]); - bank1.read_delays.push_back(bnk.read_delays[bnk.read_delays.size() - 1]); - for (int i = 0; i < bnk.read_delays.size() - 1; i++) { - bank2.read_delays.push_back(bnk.read_delays[i]); - } - counter++; - auto outpt_vect = bnk.get_out_ports(); - auto outpt = outpt_vect[0]; - for (auto i : outpt_vect) {cout << " out port: " << i << endl;} - add_bank_between(inpt, outpt, bank2); - add_bank_between(inpt, outpt, bank1); - remove_bank(bnk.name); - mergeable.push_back(bank1); - } else { - if (options.debug_options.expect_all_linebuffers) { - //assert(receivers.size() == 1 || bnk.read_delays.size() == 2); - assert(bnk.read_delays.size() == 2); - } - if (bnk.read_delays.size() == 2) { - assert(bnk.read_delays[0] == 0); - mergeable.push_back(bnk); + int counter = 0; + + for (auto inpt : get_in_ports()) { + // try to turn the banks for this inpt into one big linebuffer + vector receivers = receiver_banks(inpt); + vector mergeable; + + for (auto bnk : receivers) { + if (bnk.read_delays.size() != 2) { + cout << "splitting banks " << endl; + // splitting banks + stack_bank bank1, bank2; + bank1.tp = BANK_TYPE_STACK; + bank1.rddom = bnk.rddom; + bank1.name = inpt + "_split_banks1_" + to_string(counter); + bank1.pt_type_string = bnk.pt_type_string; + bank1.num_readers = mergeable.size(); + bank1.maxdelay = bnk.maxdelay; + + bank2.tp = BANK_TYPE_STACK; + bank2.rddom = bnk.rddom; + bank2.name = inpt + "_split_banks2_" + to_string(counter); + bank2.pt_type_string = bnk.pt_type_string; + bank2.num_readers = mergeable.size(); + bank2.maxdelay = bnk.maxdelay; + // read delays are offsets are within banks + // look at different pieces of access pattern + bank1.read_delays.push_back(bnk.read_delays[0]); + bank1.read_delays.push_back(bnk.read_delays[bnk.read_delays.size() - 1]); + for (int i = 0; i < bnk.read_delays.size() - 1; i++) { + bank2.read_delays.push_back(bnk.read_delays[i]); + } + counter++; + auto outpt_vect = bnk.get_out_ports(); + auto outpt = outpt_vect[0]; + for (auto i : outpt_vect) {cout << " out port: " << i << endl;} + + add_bank_between(inpt, outpt, bank2); + add_bank_between(inpt, outpt, bank1); + remove_bank(bnk.name); + mergeable.push_back(bank1); + + } else { + if (options.debug_options.expect_all_linebuffers) { + //assert(receivers.size() == 1 || bnk.read_delays.size() == 2); + assert(bnk.read_delays.size() == 2); + } + if (bnk.read_delays.size() == 2) { + assert(bnk.read_delays[0] == 0); + mergeable.push_back(bnk); + } } - } - if (mergeable.size() > 0) { - merge_bank(options, inpt, mergeable); - auto banks = get_banks(); - //cout << "finished create bank!" << endl; - //for (bank bk : banks) { - //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; - //cout << tab(1); - //for (int dl: bk.read_delays) { - //cout << dl << "," ; - //} - //cout << endl; - //for (auto dl: bk.delay_map) { - //cout < 0) { + merge_bank(options, inpt, mergeable); + auto banks = get_banks(); + //cout << "finished create bank!" << endl; + //for (bank bk : banks) { + //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; + //cout << tab(1); + //for (int dl: bk.read_delays) { + //cout << dl << "," ; + //} + //cout << endl; + //for (auto dl: bk.delay_map) { + //cout < Date: Sat, 6 Jun 2020 17:06:52 -0700 Subject: [PATCH 14/33] replace --- ubuffer.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ubuffer.h b/ubuffer.h index 86f8a6f2f..e47ce2276 100644 --- a/ubuffer.h +++ b/ubuffer.h @@ -979,12 +979,17 @@ class UBuffer { void replace_bank(stack_bank& target, stack_bank& replacement) { for (auto bnk : stack_banks) { + cout<<"-----------------------------bnk.second.size"<, std::vector> replace; for (auto bnk : stack_banks) { if (bnk.first.second != pt_name) { From a0325a051cafa0fd65ccf564f2fc78e0624b8454 Mon Sep 17 00:00:00 2001 From: Sophia Liu Date: Sat, 6 Jun 2020 17:41:14 -0700 Subject: [PATCH 15/33] access map printout --- ubuffer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 8dc8afd63..e6bf001cb 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1290,7 +1290,7 @@ cout << "&&&&&&&&&&&&&&&&&&&&&&&& 510 " << endl; for (auto outpt : get_out_ports()) { auto overlap = its(range(access_map.at(inpt)), range(access_map.at(outpt))); - +cout<<"access map in "< Date: Sat, 6 Jun 2020 21:40:09 -0700 Subject: [PATCH 16/33] rddom test --- ubuffer.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index e6bf001cb..472d793af 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1171,7 +1171,16 @@ cout << "&&&&&&&&&&&&&&&&&&&&&&&& 510 " << endl; auto rddom = unn(range(access_map.at(inpt)), range(access_map.at(outpt))); - //cout << "Read domain for bank: " << str(rddom) << endl; + cout << "Read domain for bank: " << str(rddom) << endl; +cout<<"access map "< Date: Sat, 6 Jun 2020 21:52:12 -0700 Subject: [PATCH 17/33] debugging --- ubuffer.cpp | 4 ++-- ubuffer.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 472d793af..d246f527e 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1177,10 +1177,10 @@ isl_union_map* test =access_map.at(outpt); auto maptest = to_map(test); cout<<"access map output "<< domain_name(maptest)< Date: Sat, 6 Jun 2020 22:02:35 -0700 Subject: [PATCH 18/33] comment --- ubuffer.cpp | 2 -- ubuffer.h | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index d246f527e..c3ced3959 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -523,13 +523,11 @@ void generate_code_prefix(CodegenOptions& options, //string inpt = buf.get_in_port(); out << "#include \"hw_classes.h\"" << endl << endl; -cout << "&&&&&&&&&&&&&&&&&&&&&&&&& 504 " << endl; for (auto b : buf.get_banks()) { generate_bank(options, out, b); } out << "struct " << buf.name << "_cache {" << endl; -cout << "&&&&&&&&&&&&&&&&&&&&&&&& 510 " << endl; for (auto b : buf.get_banks()) { out << tab(1) << b.name << "_cache " diff --git a/ubuffer.h b/ubuffer.h index 1c688c3bc..82b786a45 100644 --- a/ubuffer.h +++ b/ubuffer.h @@ -1016,11 +1016,10 @@ cout<<"remove bank --------------------------------------------------------"< get_banks() { - cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << endl; vector bnk; for (auto b : stack_banks) { for (auto b_ : b.second) { - cout << "-------------------------------------------------------------------------------------------------------- " << b_.name << endl; + cout << "bank name " << b_.name << endl; bnk.push_back(b_); } } From 8eb50c3fcbd6e89db6fbbaf8d6e662182ae22850 Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 6 Jun 2020 22:07:11 -0700 Subject: [PATCH 19/33] comment --- ubuffer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ubuffer.cpp b/ubuffer.cpp index c3ced3959..f2cb7f951 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1216,6 +1216,7 @@ cout<<"access map output "<< range_name(maptest)< Date: Sat, 6 Jun 2020 22:10:33 -0700 Subject: [PATCH 20/33] comment --- ubuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index f2cb7f951..8d6f9a2da 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1191,6 +1191,7 @@ cout<<"access map output "<< range_name(maptest)< mergeable) { + cout << "merge bank called " << endl; if (!options.conditional_merge){ stack_bank merged; merged.tp = BANK_TYPE_STACK; @@ -1214,7 +1215,7 @@ cout<<"access map output "<< range_name(maptest)< Date: Sat, 6 Jun 2020 22:20:32 -0700 Subject: [PATCH 21/33] more prints --- ubuffer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 8d6f9a2da..4ad6afb47 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -519,6 +519,7 @@ void generate_code_prefix(CodegenOptions& options, UBuffer& buf) { //banking and merge pass + cout << "before generate bank and merge " << endl; buf.generate_bank_and_merge(options); //string inpt = buf.get_in_port(); @@ -855,6 +856,7 @@ void generate_code_prefix(CodegenOptions& options, } void generate_hls_code(CodegenOptions& options, std::ostream& out, UBuffer& buf) { +cout << "generate hls code " << endl; generate_code_prefix(options, out, buf); for (auto outpt : buf.get_out_ports()) { @@ -1227,9 +1229,6 @@ cout << "mergeable size " << mergeable.size() << endl; sort(mergeable.begin(), mergeable.end(), [](const bank& l, const bank& r) { return l.maxdelay > r.maxdelay; }); - for (auto merge_bank : mergeable) { - //cout << merge_bank.name << " with delay : " << merge_bank.maxdelay << endl; - } while(mergeable.size()) { //keep pop port to merged bank and replace origin bank @@ -1309,12 +1308,12 @@ cout<<"access map in "< receivers = receiver_banks(inpt); vector mergeable; - +cout << "num receivers " << receivers.size() << endl; for (auto bnk : receivers) { if (bnk.read_delays.size() != 2) { cout << "splitting banks " << endl; @@ -1367,6 +1366,7 @@ add_bank_between(inpt, outpt, bank2); } if (mergeable.size() > 0) { +cout << "before merge bank call" << endl; merge_bank(options, inpt, mergeable); auto banks = get_banks(); //cout << "finished create bank!" << endl; From d17458c34387faa8513a9d65d8a3a248ca654c1c Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 6 Jun 2020 22:28:12 -0700 Subject: [PATCH 22/33] playing with for loop --- ubuffer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 4ad6afb47..42a50c923 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1349,7 +1349,7 @@ for(auto b : bnk.delay_map){ cout<< b.first<<" "< 0) { cout << "before merge bank call" << endl; merge_bank(options, inpt, mergeable); @@ -1383,7 +1383,7 @@ cout << "before merge bank call" << endl; //} } - } + // } } } From 0b65b0e874716afde9b9838f8504b94baf7e78aa Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 6 Jun 2020 22:35:28 -0700 Subject: [PATCH 23/33] fixed multiple diff merging bugs - same merge bank is repeated now --- ubuffer.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 42a50c923..e69542a1d 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1344,10 +1344,10 @@ cout << "num receivers " << receivers.size() << endl; auto outpt = outpt_vect[0]; for (auto i : outpt_vect) {cout << " out port: " << i << endl;} cout<<"output access map "< 0) { cout << "before merge bank call" << endl; merge_bank(options, inpt, mergeable); auto banks = get_banks(); + } //cout << "finished create bank!" << endl; //for (bank bk : banks) { //cout << bk.name << " has delays: ";//<< bk.read_delays << endl; @@ -1382,7 +1384,6 @@ cout << "before merge bank call" << endl; //} //} - } // } } } From 2fb432049b082e81594451fc7e6878135be27b6c Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 6 Jun 2020 22:50:47 -0700 Subject: [PATCH 24/33] delete print --- ubuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index e69542a1d..999eb9ecf 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -524,6 +524,7 @@ void generate_code_prefix(CodegenOptions& options, //string inpt = buf.get_in_port(); out << "#include \"hw_classes.h\"" << endl << endl; + cout << "before get banks " << endl; for (auto b : buf.get_banks()) { generate_bank(options, out, b); } @@ -1369,7 +1370,7 @@ cout << "num receivers " << receivers.size() << endl; if (mergeable.size() > 0) { cout << "before merge bank call" << endl; merge_bank(options, inpt, mergeable); - auto banks = get_banks(); +// auto banks = get_banks(); } //cout << "finished create bank!" << endl; //for (bank bk : banks) { From 8bac77e268a358d8a4aaa043c55822c61ec0bbdd Mon Sep 17 00:00:00 2001 From: Sophia Liu Date: Sat, 6 Jun 2020 23:08:43 -0700 Subject: [PATCH 25/33] get bank fix --- ubuffer.h | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/ubuffer.h b/ubuffer.h index 82b786a45..02a84286e 100644 --- a/ubuffer.h +++ b/ubuffer.h @@ -978,19 +978,14 @@ class UBuffer { } void replace_bank(stack_bank& target, stack_bank& replacement) { - cout << "target name " << target.name << " replace name " << replacement.name << endl; + //cout << "target name " << target.name << " replace name " << replacement.name << endl; for (auto bnk : stack_banks) { - cout<<"-----------------------------bnk.second.size"<, std::vector> replace; for (auto bnk : stack_banks) { if (bnk.first.second != pt_name) { @@ -1017,28 +1011,17 @@ cout<<"remove bank --------------------------------------------------------"< get_banks() { vector bnk; + std::set done; for (auto b : stack_banks) { for (auto b_ : b.second) { cout << "bank name " << b_.name << endl; - bnk.push_back(b_); - } - } - return bnk; - /*std::set done; - for (auto bs : stack_banks) { - auto ex = bs.second; - if (!elem(ex[0].name, done)) { - bnk.push_back(ex[0]); - done.insert(ex[0].name); - }*/ - /*for (auto b_ : bs.second) { - if (!elem(b_.name, done)) { - bnk.push_back(b_); + if (!elem(b_.name, done)){ + bnk.push_back(b_); done.insert(b_.name); } } } - return bnk;*/ + return bnk; } void add_bank_between(const std::string& inpt, const std::string& outpt, stack_bank& bank) { From 4a8d0e01041a964bab6d6158f6d9c1c104f23309 Mon Sep 17 00:00:00 2001 From: Sophia Liu Date: Sat, 6 Jun 2020 23:16:57 -0700 Subject: [PATCH 26/33] bank remove --- ubuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 999eb9ecf..f0f9f7dd5 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1350,9 +1350,9 @@ cout << "num receivers " << receivers.size() << endl; cout<< b.first<<" "< Date: Sun, 7 Jun 2020 15:25:19 -0700 Subject: [PATCH 27/33] print basic map test --- ubuffer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ubuffer.cpp b/ubuffer.cpp index f0f9f7dd5..5af6591e7 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1174,6 +1174,17 @@ cout << "generate hls code " << endl; range(access_map.at(outpt))); cout << "Read domain for bank: " << str(rddom) << endl; cout<<"access map "< Date: Sun, 7 Jun 2020 21:33:57 -0700 Subject: [PATCH 28/33] using access map to split banks instead of hardcoding --- ubuffer.cpp | 84 +++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 5af6591e7..4a7f292c6 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -1141,13 +1141,13 @@ cout << "generate hls code " << endl; int num_readers = 0; auto in_actions = domain.at(inpt); - //cout << "\t in action : " << str(in_actions) << endl; + cout << "\t in action : " << str(in_actions) << endl; auto lex_max_events = get_lexmax_events(outpt); - //cout << "\t lexmax result: " << str(lex_max_events) << endl; + cout << "\t lexmax result: " << str(lex_max_events) << endl; auto act_dom = ::domain(its_range(lex_max_events, to_uset(in_actions))); - //cout <<"\t act dom: " << str(act_dom) << endl; + cout <<"\t act dom: " << str(act_dom) << endl; if (!isl_union_set_is_empty(act_dom)) { num_readers++; @@ -1155,7 +1155,7 @@ cout << "generate hls code " << endl; int qpd = compute_dd_bound(outpt, inpt, true); int lb = compute_dd_bound(outpt, inpt, false); - //cout << "ub: " << qpd << ", lb: " << lb << endl; + cout << "ub: " << qpd << ", lb: " << lb << endl; for (int i = lb; i < qpd + 1; i++) { read_delays.push_back(i); @@ -1165,9 +1165,9 @@ cout << "generate hls code " << endl; string pt_type_string = port_type_string(); string name = inpt + "_to_" + outpt; - //cout << "inpt = " << inpt << endl; - //cout << "outpt = " << outpt << endl; - //cout << "name of bank = " << name << endl; + cout << "inpt = " << inpt << endl; + cout << "outpt = " << outpt << endl; + cout << "name of bank = " << name << endl; auto rddom = unn(range(access_map.at(inpt)), @@ -1183,8 +1183,6 @@ for(auto m : get_maps(access_map.at(outpt))){ } - -assert(false); isl_union_map* test =access_map.at(outpt); auto maptest = to_map(test); cout<<"access map output "<< domain_name(maptest)< receivers = receiver_banks(inpt); vector mergeable; -cout << "num receivers " << receivers.size() << endl; + cout << "num receivers " << receivers.size() << endl; for (auto bnk : receivers) { + if (bnk.read_delays.size() != 2) { - cout << "splitting banks " << endl; - // splitting banks - stack_bank bank1, bank2; - bank1.tp = BANK_TYPE_STACK; - bank1.rddom = bnk.rddom; - bank1.name = inpt + "_split_banks1_" + to_string(counter); - bank1.pt_type_string = bnk.pt_type_string; - bank1.num_readers = mergeable.size(); - bank1.maxdelay = bnk.maxdelay; - - bank2.tp = BANK_TYPE_STACK; - bank2.rddom = bnk.rddom; - bank2.name = inpt + "_split_banks2_" + to_string(counter); - bank2.pt_type_string = bnk.pt_type_string; - bank2.num_readers = mergeable.size(); - bank2.maxdelay = bnk.maxdelay; - // read delays are offsets are within banks - // look at different pieces of access pattern - bank1.read_delays.push_back(bnk.read_delays[0]); - bank1.read_delays.push_back(bnk.read_delays[bnk.read_delays.size() - 1]); - for (int i = 0; i < bnk.read_delays.size() - 1; i++) { - bank2.read_delays.push_back(bnk.read_delays[i]); - } - counter++; auto outpt_vect = bnk.get_out_ports(); auto outpt = outpt_vect[0]; - for (auto i : outpt_vect) {cout << " out port: " << i << endl;} - cout<<"output access map "< split_banks; + for (auto m : get_maps(access_map.at(outpt))) { + for (auto m_ : get_basic_maps(m)) { + string new_output = outpt + "_" + to_string(counter); + access_map.insert(std::pair(new_output, to_umap(to_map(m_)))); + schedule.insert(std::pair(new_output, schedule.at(outpt))); + //cout << "ACCESS MAP INSERT " << endl; + + stack_bank b_ = compute_bank_info(inpt, new_output); + add_bank_between(inpt, outpt, b_); + if (b_.read_delays.size() == 2) { + mergeable.push_back(b_); + } + access_map.erase(new_output); + schedule.erase(new_output); + //for (int i = 0; i < b_.read_delays.size(); i++) { + // cout << "counter: " << counter << " " << " NEW BANK READ DELAYS: " << b_.read_delays[i] << endl; + //} + //cout << "ACCESS MAP: " << str(m_) << endl; + counter++; + } + } remove_bank(outpt); - add_bank_between(inpt, outpt, bank2); - add_bank_between(inpt, outpt, bank1); - mergeable.push_back(bank1); } else { if (options.debug_options.expect_all_linebuffers) { From b8ad137876bb8d290e5029ee9d2c57b2d540821a Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Mon, 8 Jun 2020 15:51:35 -0700 Subject: [PATCH 29/33] move remove bank earlier so all new banks created at an output port are not deleted --- ubuffer.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 4a7f292c6..5a9e8659c 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -231,8 +231,10 @@ void generate_bank(CodegenOptions& options, if (num_readers == 1 || options.all_rams) { int partition_capacity = 1 + maxdelay; out << "\tfifo<" << pt_type_string << ", " << partition_capacity << "> f" << ";" << endl; + cout << "peek1" << endl; out << "\tinline " + pt_type_string + " peek(const int offset) {" << endl; ignore_inter_deps(out, "f"); + cout << "peek2" << endl; out << tab(2) << "return f.peek(" << partition_capacity - 1 << " - offset);" << endl; out << tab(1) << "}" << endl << endl; @@ -294,6 +296,7 @@ void generate_bank(CodegenOptions& options, //int capacity = capacities.at(nind); int capacity = p.second; assert(dv >= 0); + cout << "peek3" << endl; out << "\tinline " << pt_type_string << " peek_" << to_string(dv) << "() {" << endl; if (capacity > 1) { ignore_inter_deps(out, p.first); @@ -670,26 +673,33 @@ void generate_code_prefix(CodegenOptions& options, } else if (options.inner_bank_offset_mode == INNER_BANK_OFFSET_STACK) { if (options.all_rams || buf.get_bank(bank).num_readers == 1) { +cout << "peek4" << endl; value_str = bank + ".peek(/* one reader or all rams */ " + delay_expr + ")"; } else if (opt_const) { if (!options.all_rams && is_number(dx)) { assert(safe_stoi(dx) >= 0); +cout << "peek5" << endl; value_str = bank + ".peek_" + dx + "()"; } else { +cout << "peek6" << endl; value_str = bank + ".peek" + "( /* is opt const */ " + delay_expr + ")"; } } else if (pieces.size() == 0 && !options.all_rams) { +cout << "peek7" << endl; value_str = bank + ".peek_0()"; } else if (pieces.size() == 1 && isl_set_is_subset(cpy(out_domain), cpy(pieces[0].first))) { string dx = codegen_c(pieces[0].second); if (!options.all_rams && is_number(dx)) { assert(safe_stoi(dx) >= 0); +cout << "peek8" << endl; value_str = bank + ".peek_" + dx + "()"; } else { +cout << "peek9" << endl; value_str = bank + ".peek" + "(/* is one piece but not a number */" + dx + ")"; } } else { +cout << "peek10" << endl; value_str = bank + ".peek" + "(/* Needs general delay string */ " + delay_expr + ")"; } } @@ -1337,6 +1347,8 @@ cout<<"access map in "< split_banks; for (auto m : get_maps(access_map.at(outpt))) { for (auto m_ : get_basic_maps(m)) { @@ -1344,14 +1356,14 @@ cout<<"access map in "<(new_output, to_umap(to_map(m_)))); schedule.insert(std::pair(new_output, schedule.at(outpt))); //cout << "ACCESS MAP INSERT " << endl; - + stack_bank b_ = compute_bank_info(inpt, new_output); add_bank_between(inpt, outpt, b_); if (b_.read_delays.size() == 2) { mergeable.push_back(b_); } - access_map.erase(new_output); - schedule.erase(new_output); +// access_map.erase(new_output); +// schedule.erase(new_output); //for (int i = 0; i < b_.read_delays.size(); i++) { // cout << "counter: " << counter << " " << " NEW BANK READ DELAYS: " << b_.read_delays[i] << endl; //} @@ -1360,8 +1372,6 @@ cout<<"access map in "< Date: Mon, 8 Jun 2020 16:53:15 -0700 Subject: [PATCH 30/33] prints --- ubuffer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index 5a9e8659c..bace445e0 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -227,7 +227,8 @@ void generate_bank(CodegenOptions& options, out << "\t// # of read delays: " << read_delays.size() << endl; read_delays = sort_unique(read_delays); - + cout << "PEEK num readers " << num_readers << endl; + cout << "PEEK options.all_rams " << options.all_rams << endl; if (num_readers == 1 || options.all_rams) { int partition_capacity = 1 + maxdelay; out << "\tfifo<" << pt_type_string << ", " << partition_capacity << "> f" << ";" << endl; @@ -529,6 +530,8 @@ void generate_code_prefix(CodegenOptions& options, out << "#include \"hw_classes.h\"" << endl << endl; cout << "before get banks " << endl; for (auto b : buf.get_banks()) { +cout << "BANK NAME " << b.name << endl; +cout<< "BANK MERGED READERS " << b.num_readers << endl; generate_bank(options, out, b); } @@ -672,6 +675,8 @@ void generate_code_prefix(CodegenOptions& options, value_str = bank + ".read(/*ram type address*/ "+ linear_addr + ")"; } else if (options.inner_bank_offset_mode == INNER_BANK_OFFSET_STACK) { + std::cout << "PEEK 4 options all rams " << options.all_rams << endl; + std::cout << "PEEK 4 num readers " << buf.get_bank(bank).num_readers << endl; if (options.all_rams || buf.get_bank(bank).num_readers == 1) { cout << "peek4" << endl; value_str = bank + ".peek(/* one reader or all rams */ " + delay_expr + ")"; @@ -1095,7 +1100,6 @@ cout << "generate hls code " << endl; } //cout << "compute max delay for super bank = " << maxdelay << endl; vector read_delays{0}; - int num_readers = outpt_set.size(); //int num_writers = inpt_set.size(); @@ -1159,6 +1163,8 @@ cout << "generate hls code " << endl; cout <<"\t act dom: " << str(act_dom) << endl; + cout << "COMPUTE BANK INFO " << !isl_union_set_is_empty(act_dom) << endl; + if (!isl_union_set_is_empty(act_dom)) { num_readers++; //auto c = compute_dd(buf, outpt, inpt); @@ -1224,6 +1230,7 @@ cout<<"access map output "<< range_name(maptest)< Date: Mon, 8 Jun 2020 17:34:43 -0700 Subject: [PATCH 31/33] added prints --- ubuffer.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ubuffer.cpp b/ubuffer.cpp index bace445e0..ddfe2fe33 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -289,7 +289,7 @@ void generate_bank(CodegenOptions& options, //} //assert(capacities.size() == partitions.size()); - +cout<<"num partitions "< Date: Thu, 11 Jun 2020 19:52:02 -0700 Subject: [PATCH 32/33] conv 2d logs --- conv_2d_bc.cpp | 790 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 790 insertions(+) create mode 100644 conv_2d_bc.cpp diff --git a/conv_2d_bc.cpp b/conv_2d_bc.cpp new file mode 100644 index 000000000..1ccc864c8 --- /dev/null +++ b/conv_2d_bc.cpp @@ -0,0 +1,790 @@ +#ifndef __VIVADO_SYNTH__ +#include +using namespace std; + + // Debug utility + ofstream* global_debug_handle; + +#endif //__VIVADO_SYNTH__ +#include "conv_3x3.h" + +#include "hw_classes.h" + +struct I_write_0_merged_banks_19_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 66 + // # of read delays: 4 + hw_uint<32> f0; + hw_uint<32> f2; + fifo , 62> f3; + hw_uint<32> f4; + hw_uint<32> f6; + + + inline hw_uint<32> peek_0() { + return f0; + } + + inline hw_uint<32> peek_1() { + return f2; + } + + inline hw_uint<32> peek_63() { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f3.back(); + } + + inline hw_uint<32> peek_64() { + return f4; + } + + inline hw_uint<32> peek_65() { + return f6; + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 1 + f6 = f4; +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 62 + f4 = f3.back(); +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 62 reading from capacity: 1 + f3.push(f2); +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // cap: 1 reading from capacity: 1 + f2 = f0; + // cap: 1 + f0 = value; + } + +}; + +struct I_write_0_to_I_read_0_10_1_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_11_5_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_8_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 131 + // # of read delays: 67 + fifo , 131> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(130 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_9_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_3_10_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_4_12_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_4_13_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 67 + // # of read delays: 3 + fifo , 67> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(66 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_5_16_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 64 + // # of read delays: 64 + fifo , 64> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(63 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_5_17_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 3 + // # of read delays: 3 + fifo , 3> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(2 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_20_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_21_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 130 + // # of read delays: 66 + fifo , 130> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(129 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_6_23_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_7_25_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_8_29_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 63 + // # of read delays: 63 + fifo , 63> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(62 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_32_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 66 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_33_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 129 + // # of read delays: 65 + fifo , 129> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(128 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_write_0_to_I_read_0_9_35_cache { + // RAM Box: {[0, 63], [0, 63]} + // Capacity: 62 + // # of read delays: 62 + fifo , 62> f; + inline hw_uint<32> peek(const int offset) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.peek(61 - offset); + } + + + + inline void push(const hw_uint<32> value) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + return f.push(value); + } + +}; + +struct I_cache { + I_write_0_merged_banks_19_cache I_write_0_merged_banks_19; + I_write_0_to_I_read_0_10_1_cache I_write_0_to_I_read_0_10_1; + I_write_0_to_I_read_0_11_5_cache I_write_0_to_I_read_0_11_5; + I_write_0_to_I_read_0_3_8_cache I_write_0_to_I_read_0_3_8; + I_write_0_to_I_read_0_3_9_cache I_write_0_to_I_read_0_3_9; + I_write_0_to_I_read_0_3_10_cache I_write_0_to_I_read_0_3_10; + I_write_0_to_I_read_0_4_12_cache I_write_0_to_I_read_0_4_12; + I_write_0_to_I_read_0_4_13_cache I_write_0_to_I_read_0_4_13; + I_write_0_to_I_read_0_5_16_cache I_write_0_to_I_read_0_5_16; + I_write_0_to_I_read_0_5_17_cache I_write_0_to_I_read_0_5_17; + I_write_0_to_I_read_0_6_20_cache I_write_0_to_I_read_0_6_20; + I_write_0_to_I_read_0_6_21_cache I_write_0_to_I_read_0_6_21; + I_write_0_to_I_read_0_6_23_cache I_write_0_to_I_read_0_6_23; + I_write_0_to_I_read_0_7_25_cache I_write_0_to_I_read_0_7_25; + I_write_0_to_I_read_0_8_29_cache I_write_0_to_I_read_0_8_29; + I_write_0_to_I_read_0_9_32_cache I_write_0_to_I_read_0_9_32; + I_write_0_to_I_read_0_9_33_cache I_write_0_to_I_read_0_9_33; + I_write_0_to_I_read_0_9_35_cache I_write_0_to_I_read_0_9_35; +}; + + + +inline void I_write_0_write(hw_uint<32> & I_write_0, I_cache& I, int root, int pr, int pc) { + I.I_write_0_merged_banks_19.push(I_write_0); + I.I_write_0_to_I_read_0_10_1.push(I_write_0); + I.I_write_0_to_I_read_0_11_5.push(I_write_0); + I.I_write_0_to_I_read_0_3_8.push(I_write_0); + I.I_write_0_to_I_read_0_3_9.push(I_write_0); + I.I_write_0_to_I_read_0_3_10.push(I_write_0); + I.I_write_0_to_I_read_0_4_12.push(I_write_0); + I.I_write_0_to_I_read_0_4_13.push(I_write_0); + I.I_write_0_to_I_read_0_5_16.push(I_write_0); + I.I_write_0_to_I_read_0_5_17.push(I_write_0); + I.I_write_0_to_I_read_0_6_20.push(I_write_0); + I.I_write_0_to_I_read_0_6_21.push(I_write_0); + I.I_write_0_to_I_read_0_6_23.push(I_write_0); + I.I_write_0_to_I_read_0_7_25.push(I_write_0); + I.I_write_0_to_I_read_0_8_29.push(I_write_0); + I.I_write_0_to_I_read_0_9_32.push(I_write_0); + I.I_write_0_to_I_read_0_9_33.push(I_write_0); + I.I_write_0_to_I_read_0_9_35.push(I_write_0); +} + +inline hw_uint<32> I_read_0_10_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_10 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 62 <= lr <= 63 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 60; read_0[root = 0, lr, lc] -> I[63, 1 + lr] : 0 <= lr <= 61 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 60; read_0[root, lr, lc] -> 64 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lc = 61 and 0 <= lr <= 61; read_0[root, lr, lc] -> (2 + lc) : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ ((-63 + lc == 0 && 61 - lr >= 0) || (61 - lc >= 0 && 61 - lr >= 0)) ? (64) : (-62 + lc == 0 && 61 - lr >= 0) ? (64) : (-62 + lr >= 0 && 60 - lc >= 0) ? ((61 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_11_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_11 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 61 <= lr <= 63 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 60; read_0[root = 0, lr, lc] -> I[63, 2 + lr] : 0 <= lr <= 60 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (-62 + lr >= 0 && 60 - lc >= 0) ? ((61 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_3_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_3 read pattern: { read_0[root = 0, lr, lc] -> I[lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 62; read_0[root = 0, lr = 63, lc] -> I[lc, 63] : 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, lr] : 0 <= lr <= 62; read_0[root = 0, lr = 63, lc = 63] -> I[63, 63] } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and lr = 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 128 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and lc = 63; read_0[root, lr, lc] -> 130 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 129 : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> (127 - lc) : root = 0 and lr = 62 and 0 <= lc <= 62 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_3_8.peek(/* one reader or all rams */ (-63 + lc == 0 && -62 + lr == 0) ? (64) : (-63 + lc == 0 && 61 - lr >= 0) ? (128) : (-62 + lc == 0 && 61 - lr >= 0) ? (129) : (61 - lc >= 0 && 61 - lr >= 0) ? (130) : (-63 + lr == 0 && 62 - lc >= 0) ? ((63 - lc)) : (-62 + lr == 0 && 62 - lc >= 0) ? ((127 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_4_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_4 read pattern: { read_0[root = 0, lr, lc] -> I[lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root = 0, lr, lc] -> I[lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, 63] : 62 <= lr <= 63; read_0[root = 0, lr, lc = 63] -> I[63, 1 + lr] : 0 <= lr <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 64 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 66 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 65 : root = 0 and lc = 62 and 0 <= lr <= 61 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_4_12.peek(/* one reader or all rams */ (-63 + lc == 0 && 61 - lr >= 0) ? (64) : (-62 + lc == 0 && 61 - lr >= 0) ? (65) : (61 - lc >= 0 && 61 - lr >= 0) ? (66) : (-62 + lr >= 0 && 62 - lc >= 0) ? ((63 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_5_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_5 read pattern: { read_0[root = 0, lr, lc] -> I[lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 62; read_0[root = 0, lr, lc] -> I[lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 62; read_0[root = 0, lr, lc = 63] -> I[63, 63] : 61 <= lr <= 63; read_0[root = 0, lr, lc = 63] -> I[63, 2 + lr] : 0 <= lr <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (63 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 62; read_0[root, lr, lc] -> 2 : root = 0 and lr = 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lr = 61 and lc = 62; read_0[root, lr, lc] -> 2 : root = 0 and 0 <= lr <= 60 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lc = 62 and 0 <= lr <= 60 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_5_16.peek(/* one reader or all rams */ (-62 + lc == 0 && 61 - lr >= 0) ? (1) : (61 - lc >= 0 && 61 - lr >= 0) ? (2) : (-62 + lr >= 0 && 62 - lc >= 0) ? ((63 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_6_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_6 read pattern: { read_0[root = 0, lr, lc] -> I[63, lr] : 0 <= lr <= 62 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 61; read_0[root = 0, lr = 63, lc] -> I[63, 63] : 62 <= lc <= 63; read_0[root = 0, lr = 63, lc] -> I[1 + lc, 63] : 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and lr = 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 128 : root = 0 and 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and 62 <= lc <= 63; read_0[root, lr, lc] -> 129 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> (126 - lc) : root = 0 and lr = 62 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_6_20.peek(/* one reader or all rams */ (-62 + lr == 0 && -62 + lc >= 0) ? (64) : (-62 + lc >= 0 && 61 - lr >= 0) ? (128) : (61 - lc >= 0 && 61 - lr >= 0) ? (129) : (-63 + lr == 0 && 61 - lc >= 0) ? ((62 - lc)) : (-62 + lr == 0 && 61 - lc >= 0) ? ((126 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_7_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_7 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 62 <= lr <= 63 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 63] : 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root = 0, lr, lc] -> I[63, 1 + lr] : 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 1 + lr] : 0 <= lr <= 61 and 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 64 : root = 0 and 0 <= lr <= 61 and 62 <= lc <= 63; read_0[root, lr, lc] -> 65 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (-62 + lc >= 0 && 61 - lr >= 0) ? (64) : (61 - lc >= 0 && 61 - lr >= 0) ? (65) : (-62 + lr >= 0 && 61 - lc >= 0) ? ((62 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_8_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_8 read pattern: { read_0[root = 0, lr, lc] -> I[63, 63] : 61 <= lr <= 63 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 63] : 61 <= lr <= 63 and 0 <= lc <= 61; read_0[root = 0, lr, lc] -> I[63, 2 + lr] : 0 <= lr <= 60 and 62 <= lc <= 63; read_0[root = 0, lr, lc] -> I[1 + lc, 2 + lr] : 0 <= lr <= 60 and 0 <= lc <= 61 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (62 - lc) : root = 0 and 62 <= lr <= 63 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and lr = 61 and 0 <= lc <= 61; read_0[root, lr, lc] -> 1 : root = 0 and 0 <= lr <= 60 and 0 <= lc <= 61 } + auto value_I_write_0 = I.I_write_0_merged_banks_19.peek(/* Needs general delay string */ (61 - lc >= 0 && 61 - lr >= 0) ? (1) : (-62 + lr >= 0 && 61 - lc >= 0) ? ((62 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +inline hw_uint<32> I_read_0_9_select(I_cache& I, int root, int lr, int lc) { +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + // I_read_0_9 read pattern: { read_0[root = 0, lr, lc] -> I[63, lr] : 0 <= lr <= 62 and 61 <= lc <= 63; read_0[root = 0, lr, lc] -> I[2 + lc, lr] : 0 <= lr <= 62 and 0 <= lc <= 60; read_0[root = 0, lr = 63, lc] -> I[63, 63] : 61 <= lc <= 63; read_0[root = 0, lr = 63, lc] -> I[2 + lc, 63] : 0 <= lc <= 60 } + // Read schedule : { read_0[root = 0, lr, lc] -> [2 + lr, 2 + lc, 1] : 0 <= lr <= 63 and 0 <= lc <= 63 } + // Write schedule: { write[root = 0, pr, pc] -> [pr, pc, 0] : 0 <= pr <= 63 and 0 <= pc <= 63 } + // DD fold: { read_0[root, lr, lc] -> (61 - lc) : root = 0 and lr = 63 and 0 <= lc <= 60; read_0[root, lr, lc] -> 128 : root = 0 and lc = 63 and 0 <= lr <= 61; read_0[root, lr, lc] -> 128 : root = 0 and lc = 61 and 0 <= lr <= 61; read_0[root, lr, lc] -> (66 + lc) : root = 0 and lc = 62 and 0 <= lr <= 61; read_0[root, lr, lc] -> 64 : root = 0 and lr = 62 and ((61 <= lc <= 62) or lc = 63); read_0[root, lr, lc] -> 128 : root = 0 and 0 <= lr <= 61 and 0 <= lc <= 60; read_0[root, lr, lc] -> (125 - lc) : root = 0 and lr = 62 and 0 <= lc <= 60 } + auto value_I_write_0 = I.I_write_0_to_I_read_0_9_32.peek(/* one reader or all rams */ (-62 + lr == 0 && -61 + lc >= 0) ? (64) : ((-63 + lc == 0 && 61 - lr >= 0) || (61 - lc >= 0 && 61 - lr >= 0)) ? (128) : (-63 + lr == 0 && 60 - lc >= 0) ? ((61 - lc)) : (-62 + lc == 0 && 61 - lr >= 0) ? (128) : (-62 + lr == 0 && 60 - lc >= 0) ? ((125 - lc)) : 0); + return value_I_write_0; +#ifndef __VIVADO_SYNTH__ + cout << "Error: Unsupported offsets: " << " root = " << root << " lr = " << lr << " lc = " << lc << endl; + assert(false); + return 0; +#endif //__VIVADO_SYNTH__ +} + +// # of bundles = 3 +// I_write_0 +// I_write_0 +inline void I_I_write_0_bundle_write(hw_uint<32>& I_write_0, I_cache& I, int root, int pr, int pc) { + hw_uint<32> I_write_0_res = I_write_0.extract<0, 31>(); + I_write_0_write(I_write_0_res, I, root, pr, pc); +} + +// read_0_read +// I_read_0_3 +// I_read_0_4 +// I_read_0_5 +// I_read_0_6 +// I_read_0_7 +// I_read_0_8 +// I_read_0_9 +// I_read_0_10 +// I_read_0_11 +inline hw_uint<288> I_read_0_read_bundle_read(I_cache& I, int root, int lr, int lc) { + // # of ports in bundle: 9 + // I_read_0_3 + // I_read_0_4 + // I_read_0_5 + // I_read_0_6 + // I_read_0_7 + // I_read_0_8 + // I_read_0_9 + // I_read_0_10 + // I_read_0_11 + + hw_uint<288> result; + hw_uint<32> I_read_0_3_res = I_read_0_3_select(I, root, lr, lc); + set_at<0, 288>(result, I_read_0_3_res); + hw_uint<32> I_read_0_4_res = I_read_0_4_select(I, root, lr, lc); + set_at<32, 288>(result, I_read_0_4_res); + hw_uint<32> I_read_0_5_res = I_read_0_5_select(I, root, lr, lc); + set_at<64, 288>(result, I_read_0_5_res); + hw_uint<32> I_read_0_6_res = I_read_0_6_select(I, root, lr, lc); + set_at<96, 288>(result, I_read_0_6_res); + hw_uint<32> I_read_0_7_res = I_read_0_7_select(I, root, lr, lc); + set_at<128, 288>(result, I_read_0_7_res); + hw_uint<32> I_read_0_8_res = I_read_0_8_select(I, root, lr, lc); + set_at<160, 288>(result, I_read_0_8_res); + hw_uint<32> I_read_0_9_res = I_read_0_9_select(I, root, lr, lc); + set_at<192, 288>(result, I_read_0_9_res); + hw_uint<32> I_read_0_10_res = I_read_0_10_select(I, root, lr, lc); + set_at<224, 288>(result, I_read_0_10_res); + hw_uint<32> I_read_0_11_res = I_read_0_11_select(I, root, lr, lc); + set_at<256, 288>(result, I_read_0_11_res); + return result; +} + +// write_write +// I_write_0 +inline void I_write_write_bundle_write(hw_uint<32>& write_write, I_cache& I, int root, int pr, int pc) { + hw_uint<32> I_write_0_res = write_write.extract<0, 31>(); + I_write_0_write(I_write_0_res, I, root, pr, pc); +} + + + +// Operation logic +inline void write(HWStream >& /* buffer_args num ports = 1 */in, I_cache& I, int root, int pr, int pc) { + // Consume: in + auto in_pc_c__pr_value = in.read(); + // Produce: I + I_write_write_bundle_write(in_pc_c__pr_value, I, root, pr, pc); + +#ifndef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + +} + +inline void read_0(HWStream >& /* buffer_args num ports = 1 */out, int root, int lr, int lc) { + auto compute_result = conv_3_3(); + // Produce: out + out.write(compute_result); + +#ifndef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ + +} + +// Driver function +void conv_2d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out, int num_epochs) { + +#ifndef __VIVADO_SYNTH__ + ofstream debug_file("conv_2d_bc_debug.csv"); + global_debug_handle = &debug_file; +#endif //__VIVADO_SYNTH__ + I_cache I; +#ifdef __VIVADO_SYNTH__ +#endif //__VIVADO_SYNTH__ +#ifdef __VIVADO_SYNTH__ +#pragma HLS inline recursive +#endif // __VIVADO_SYNTH__ + + for (int epoch = 0; epoch < num_epochs; epoch++) { + for (int c0 = 0; c0 <= 65; c0 += 1) { + if (c0 >= 2) { + if (c0 <= 63) + for (int c1 = 0; c1 <= 1; c1 += 1) + write(in, I, 0, c0, c1); + for (int c1 = 2; c1 <= 65; c1 += 1) { + if (c0 <= 63 && c1 <= 63) + write(in, I, 0, c0, c1); + read_0(I, out, 0, c0 - 2, c1 - 2); + } + } else { + for (int c1 = 0; c1 <= 63; c1 += 1) + write(in, I, 0, c0, c1); + } + } + + } + +#ifndef __VIVADO_SYNTH__ + debug_file.close(); +#endif //__VIVADO_SYNTH__ +} + +void conv_2d_bc(HWStream >& /* no bundle get_args num ports = 1 */in, HWStream >& /* no bundle get_args num ports = 1 */out) { + + conv_2d_bc(in, out, 1); +} +#ifdef __VIVADO_SYNTH__ +const int write_read_num_transfers = 0; +const int read_0_write_num_transfers = 0; + + +extern "C" { + +static void read_write_read(hw_uint<32>* input, HWStream >& v, const int size) { + hw_uint<32> burst_reg; + int num_transfers = write_read_num_transfers*size; + for (int i = 0; i < num_transfers; i++) { + #pragma HLS pipeline II=1 + burst_reg = input[i]; + v.write(burst_reg); + } +} + +static void write_read_0_write(hw_uint<32>* output, HWStream >& v, const int size) { + hw_uint<32> burst_reg; + int num_transfers = read_0_write_num_transfers*size; + for (int i = 0; i < num_transfers; i++) { + #pragma HLS pipeline II=1 + burst_reg = v.read(); + output[i] = burst_reg; + } +} + +void conv_2d_bc_accel(hw_uint<32>* write_read, hw_uint<32>* read_0_write, const int size) { +#pragma HLS dataflow +#pragma HLS INTERFACE m_axi port = write_read offset = slave depth = 65536 bundle = gmem0 +#pragma HLS INTERFACE m_axi port = read_0_write offset = slave depth = 65536 bundle = gmem1 + +#pragma HLS INTERFACE s_axilite port = write_read bundle = control +#pragma HLS INTERFACE s_axilite port = read_0_write bundle = control +#pragma HLS INTERFACE s_axilite port = size bundle = control +#pragma HLS INTERFACE s_axilite port = return bundle = control + + static HWStream > write_read_channel; + static HWStream > read_0_write_channel; + + read_write_read(write_read, write_read_channel, size); + + conv_2d_bc(write_read_channel, read_0_write_channel, size); + + write_read_0_write(read_0_write, read_0_write_channel, size); +} + +} +#endif //__VIVADO_SYNTH__ + From ffe548aca0dd811c25bf5fd36a923cd21dc72714 Mon Sep 17 00:00:00 2001 From: Kavya Sreedhar Date: Sat, 27 Jun 2020 10:57:17 -0700 Subject: [PATCH 33/33] clean up --- build_set_test.cpp | 31 --------------- prog.cpp | 1 - ubuffer.cpp | 98 +++++++++++++++++++++++----------------------- ubuffer.h | 15 +------ 4 files changed, 52 insertions(+), 93 deletions(-) diff --git a/build_set_test.cpp b/build_set_test.cpp index 813cf94f8..e53b2e606 100644 --- a/build_set_test.cpp +++ b/build_set_test.cpp @@ -1001,37 +1001,6 @@ void conv_1d_bc_test() { assert(res == 0); } -prog conv_1d_bc_mirror() { - prog prg; - prg.compute_unit_file = "accumulate_3.h"; - prg.name = "conv_1d_bc"; - prg.add_input("in"); - prg.add_output("out"); - prg.buffer_port_widths["in"] = 32; - prg.buffer_port_widths["out"] = 32; - prg.buffer_port_widths["M"] = 32; - - auto p = prg.add_loop("p", 0, 10); - auto write = p->add_op("get_input"); - write->add_load("in", "p"); - write->add_store("M", "p"); - - auto c = prg.add_loop("c", 0, 10); - auto compute = c->add_op("compute_output"); - compute->add_function("accumulate_3"); - /*compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}}); - compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}}); - compute->add_load("M", {{"c < 2", "0"}, {"2 <= c <= 7", "c"}, {"7 < c <= 8", "9"}, {"c > 8", "8"}});*/ - compute->add_load("M", {{"0 <= c < 9", "c"}, {"c >= 9", "9"}}); - compute->add_load("M", {{"0 <= c < 8", "c + 1"}, {"c >= 8", "9"}}); - compute->add_load("M", {{"0 <= c < 7", "c + 2"}, {"c >= 7", "9"}}); -/* compute->add_load("M", "min(c, 9)"); - compute->add_load("M", "min(c + 1, 9)"); - compute->add_load("M", "min(c + 2, 9)");*/ - compute->add_store("out", "c"); - return prg; -} - prog conv_1d_bc() { prog prg; prg.compute_unit_file = "accumulate_3.h"; diff --git a/prog.cpp b/prog.cpp index 03596f5bc..3933c7718 100644 --- a/prog.cpp +++ b/prog.cpp @@ -1517,7 +1517,6 @@ module_type* generate_rtl_buffer(CodegenOptions& options, UBuffer& buffer) { minihls::block* blk = minigen.add_block(buffer.name); - cout << "&&&&&&&&&&&&&&&& prog.cpp" << endl; for (auto bank_struct : buffer.get_banks()) { auto bankprog = minigen.add_block(bank_struct.name); diff --git a/ubuffer.cpp b/ubuffer.cpp index ddfe2fe33..53eb8f3cc 100644 --- a/ubuffer.cpp +++ b/ubuffer.cpp @@ -227,15 +227,15 @@ void generate_bank(CodegenOptions& options, out << "\t// # of read delays: " << read_delays.size() << endl; read_delays = sort_unique(read_delays); - cout << "PEEK num readers " << num_readers << endl; - cout << "PEEK options.all_rams " << options.all_rams << endl; + // cout << "PEEK num readers " << num_readers << endl; + // cout << "PEEK options.all_rams " << options.all_rams << endl; if (num_readers == 1 || options.all_rams) { int partition_capacity = 1 + maxdelay; out << "\tfifo<" << pt_type_string << ", " << partition_capacity << "> f" << ";" << endl; - cout << "peek1" << endl; + // cout << "peek1" << endl; out << "\tinline " + pt_type_string + " peek(const int offset) {" << endl; ignore_inter_deps(out, "f"); - cout << "peek2" << endl; + // cout << "peek2" << endl; out << tab(2) << "return f.peek(" << partition_capacity - 1 << " - offset);" << endl; out << tab(1) << "}" << endl << endl; @@ -289,7 +289,7 @@ void generate_bank(CodegenOptions& options, //} //assert(capacities.size() == partitions.size()); -cout<<"num partitions "<= 0); - cout << "peek3" << endl; + // cout << "peek3" << endl; out << "\tinline " << pt_type_string << " peek_" << to_string(dv) << "() {" << endl; if (capacity > 1) { ignore_inter_deps(out, p.first); @@ -523,19 +523,20 @@ void generate_code_prefix(CodegenOptions& options, UBuffer& buf) { //banking and merge pass - cout << "before generate bank and merge " << endl; + // cout << "before generate bank and merge " << endl; buf.generate_bank_and_merge(options); //string inpt = buf.get_in_port(); out << "#include \"hw_classes.h\"" << endl << endl; cout << "before get banks " << endl; for (auto b : buf.get_banks()) { -cout << "BANK NAME " << b.name << endl; -cout<< "BANK MERGED READERS " << b.num_readers << endl; + // cout << "BANK NAME " << b.name << endl; + // cout<< "BANK MERGED READERS " << b.num_readers << endl; generate_bank(options, out, b); } out << "struct " << buf.name << "_cache {" << endl; + for (auto b : buf.get_banks()) { out << tab(1) << b.name << "_cache " @@ -567,7 +568,7 @@ cout<< "BANK MERGED READERS " << b.num_readers << endl; } else if (options.inner_bank_offset_mode == INNER_BANK_OFFSET_LINEAR) { string linear_addr = buf.generate_linearize_ram_addr(inpt); - //cout <<"Input port:" << inpt << ", Get ram string: " << linear_addr << endl; + cout <<"Input port:" << inpt << ", Get ram string: " << linear_addr << endl; out << tab(1) << buf.name << "." << sb.name << ".write(" << inpt << ", " << linear_addr << ");" << endl; } @@ -591,7 +592,8 @@ cout<< "BANK MERGED READERS " << b.num_readers << endl; auto pp = isl_pw_qpolynomial_intersect_domain(isl_pw_qpolynomial_from_qpolynomial(cpy(p.second)), cpy(p.first)); pieces_dom = unn(pieces_dom, to_uset(p.first)); } -cout<<"DOMAIN "<= 0); -cout << "peek5" << endl; + // cout << "peek5" << endl; value_str = bank + ".peek_" + dx + "()"; } else { -cout << "peek6" << endl; + // cout << "peek6" << endl; value_str = bank + ".peek" + "( /* is opt const */ " + delay_expr + ")"; } } else if (pieces.size() == 0 && !options.all_rams) { -cout << "peek7" << endl; + // cout << "peek7" << endl; value_str = bank + ".peek_0()"; } else if (pieces.size() == 1 && isl_set_is_subset(cpy(out_domain), cpy(pieces[0].first))) { string dx = codegen_c(pieces[0].second); if (!options.all_rams && is_number(dx)) { assert(safe_stoi(dx) >= 0); -cout << "peek8" << endl; + // cout << "peek8" << endl; value_str = bank + ".peek_" + dx + "()"; } else { -cout << "peek9" << endl; + // cout << "peek9" << endl; value_str = bank + ".peek" + "(/* is one piece but not a number */" + dx + ")"; } } else { -cout << "peek10" << endl; + // cout << "peek10" << endl; value_str = bank + ".peek" + "(/* Needs general delay string */ " + delay_expr + ")"; } } -cout<<"value_str "< sv_domain; - // string port name, piecewise - // compute operations -> addresses (multi D) std::map access_map; std::map schedule; std::map > port_bundles; - // input port -> output port bank - // input ports -> switching networks that set inputs to banks, array of - // banks , output ports - // generalize this mapping -> mixing + matching - // addressing scheme: bank index, addr in bank - // indices on domain of access map -> bank nums it's reading - // generalize data structure - // pair of ports + piece of access pattern - // 1 bank bw input + output for each piece map, std::vector > stack_banks; map selectors; @@ -1014,7 +1003,7 @@ class UBuffer { std::set done; for (auto b : stack_banks) { for (auto b_ : b.second) { - cout << "bank name " << b_.name << endl; + cout << "bank name " << b_.name << endl; if (!elem(b_.name, done)){ bnk.push_back(b_); done.insert(b_.name); @@ -1058,7 +1047,7 @@ class UBuffer { for (auto bs : stack_banks) { if (bs.first.first == inpt && bs.first.second == outpt) { auto first_bank = bs.second[0]; -cout << "first bank name " << first_bank.name << endl; + cout << "first bank name " << first_bank.name << endl; return first_bank.name; } }