@@ -112,6 +112,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
112112 , placer_opts_(placer_opts) {
113113 const int num_layers = g_vpr_ctx.device ().grid .get_num_layers ();
114114
115+ is_multi_layer_ = num_layers > 1 ;
116+
115117 // Either 3D BB or per layer BB data structure are used, not both.
116118 if (cube_bb_) {
117119 ts_bb_edge_new_.resize (num_nets, t_bb ());
@@ -145,10 +147,11 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
145147 * been recomputed. */
146148 bb_update_status_.resize (num_nets, NetUpdateState::NOT_UPDATED_YET);
147149
148- alloc_and_load_chan_w_factors_for_place_cost_ (placer_opts_. place_cost_exp );
150+ alloc_and_load_chan_w_factors_for_place_cost_ ();
149151}
150152
151- void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ (float place_cost_exp) {
153+ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ () {
154+ const double place_cost_exp = static_cast <double >(placer_opts_.place_cost_exp );
152155 auto & device_ctx = g_vpr_ctx.device ();
153156
154157 const int grid_height = device_ctx.grid .height ();
@@ -190,7 +193,7 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
190193 }
191194
192195 chanx_place_cost_fac_[high][low] = (high - low + 1 .) / chanx_place_cost_fac_[high][low];
193- chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], ( double ) place_cost_exp);
196+ chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], place_cost_exp);
194197 }
195198 }
196199
@@ -220,71 +223,87 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
220223 }
221224
222225 chany_place_cost_fac_[high][low] = (high - low + 1 .) / chany_place_cost_fac_[high][low];
223- chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], ( double ) place_cost_exp);
226+ chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], place_cost_exp);
224227 }
225228 }
226229
227- if (device_ctx. grid . get_num_layers () > 1 ) {
228- alloc_and_load_for_fast_vertical_cost_update_ (place_cost_exp );
230+ if (is_multi_layer_ ) {
231+ alloc_and_load_for_fast_vertical_cost_update_ ();
229232 }
230233}
231234
232- void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_ (float place_cost_exp ) {
235+ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_ () {
233236 const auto & device_ctx = g_vpr_ctx.device ();
234237 const auto & rr_graph = device_ctx.rr_graph ;
235238
236239 const size_t grid_height = device_ctx.grid .height ();
237240 const size_t grid_width = device_ctx.grid .width ();
238241
239242
240- chanz_place_cost_fac_ = vtr::NdMatrix<float , 4 >({grid_width, grid_height, grid_width, grid_height}, 0 .);
243+ acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
244+
245+ vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
241246
242- vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
247+ /*
248+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
249+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
250+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
251+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
252+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
253+ * if someday we have architectures with widely varying connectivity between different layers in a stack.
254+ */
243255
256+ /*
257+ * To calculate the accumulative number of inter-die connections we first need to get the number of
258+ * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
259+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
260+ * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
261+ * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
262+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
263+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
264+ * the block on the lower left connection of the block is added twice, that part needs to be removed.
265+ */
244266 for (const auto & src_rr_node : rr_graph.nodes ()) {
245- for (const auto & rr_edge_idx : rr_graph.configurable_edges (src_rr_node)) {
267+ for (const auto & rr_edge_idx : rr_graph.edges (src_rr_node)) {
246268 const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
247269 if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
248270 // We assume that the nodes driving the inter-layer connection or being driven by it
249- // are not streched across multiple tiles
271+ // are not stretched across multiple tiles
250272 int src_x = rr_graph.node_xhigh (src_rr_node);
251273 int src_y = rr_graph.node_yhigh (src_rr_node);
252274 VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
253275
254276 tile_num_inter_die_conn[src_x][src_y]++;
255277 }
256278 }
279+ }
257280
258- for (const auto & rr_edge_idx : rr_graph.non_configurable_edges (src_rr_node)) {
259- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
260- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
261- int src_x = rr_graph.node_xhigh (src_rr_node);
262- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_xlow (src_rr_node) == src_x);
263- int src_y = rr_graph.node_yhigh (src_rr_node);
264- VTR_ASSERT (rr_graph.node_ylow (src_rr_node) == src_y && rr_graph.node_ylow (src_rr_node) == src_y);
265- tile_num_inter_die_conn[src_x][src_y]++;
266- }
281+ int num_layers = device_ctx.grid .get_num_layers ();
282+ for (size_t x = 0 ; x < device_ctx.grid .width (); x++) {
283+ for (size_t y = 0 ; y < device_ctx.grid .height (); y++) {
284+ tile_num_inter_die_conn[x][y] /= (num_layers-1 );
267285 }
268286 }
269287
270- for (int x_high = 0 ; x_high < (int )device_ctx.grid .width (); x_high++) {
271- for (int y_high = 0 ; y_high < (int )device_ctx.grid .height (); y_high++) {
272- for (int x_low = 0 ; x_low <= x_high; x_low++) {
273- for (int y_low = 0 ; y_low <= y_high; y_low++) {
274- int num_inter_die_conn = 0 ;
275- for (int x = x_low; x <= x_high; x++) {
276- for (int y = y_low; y <= y_high; y++) {
277- num_inter_die_conn += tile_num_inter_die_conn[x][y];
278- }
279- }
280- int seen_num_tiles = (x_high - x_low + 1 ) * (y_high - y_low + 1 );
281- chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast <float >(num_inter_die_conn);
282-
283- chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow (
284- (double )chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
285- (double )place_cost_exp);
286- }
287- }
288+ // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y).
289+ acc_tile_num_inter_die_conn_[0 ][0 ] = tile_num_inter_die_conn[0 ][0 ];
290+ // Initialize the first row and column
291+ for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
292+ acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
293+ tile_num_inter_die_conn[x][0 ];
294+ }
295+
296+ for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
297+ acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
298+ tile_num_inter_die_conn[0 ][y];
299+ }
300+
301+ for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
302+ for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
303+ acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
304+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
305+ tile_num_inter_die_conn[x_high][y_high] -
306+ acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
288307 }
289308 }
290309}
@@ -818,7 +837,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
818837 }
819838
820839 /* Now account for the layer motion. */
821- if (num_layers > 1 ) {
840+ if (is_multi_layer_ ) {
822841 /* We need to update it only if multiple layers are available */
823842 for (int layer_num = 0 ; layer_num < num_layers; layer_num++) {
824843 num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num];
@@ -1402,8 +1421,6 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14021421
14031422 const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move ().bb_coords [net_id];
14041423
1405- const bool is_multi_layer = (g_vpr_ctx.device ().grid .get_num_layers () > 1 );
1406-
14071424 double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
14081425
14091426 /* Could insert a check for xmin == xmax. In that case, assume *
@@ -1420,12 +1437,14 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14201437 */
14211438
14221439 double ncost;
1423- ncost = (bb.xmax - bb.xmin + 1 ) * crossing * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1424- ncost += (bb.ymax - bb.ymin + 1 ) * crossing * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1425- if (is_multi_layer ) {
1426- ncost += (bb.layer_max - bb.layer_min ) * crossing * chanz_place_cost_fac_[bb. xmax ][bb. ymax ][bb. xmin ][bb. ymin ] ;
1440+ ncost = (bb.xmax - bb.xmin + 1 ) * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1441+ ncost += (bb.ymax - bb.ymin + 1 ) * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1442+ if (is_multi_layer_ ) {
1443+ ncost += (bb.layer_max - bb.layer_min ) * get_chanz_cost_factor_ (bb) ;
14271444 }
14281445
1446+ ncost *= crossing;
1447+
14291448 return ncost;
14301449}
14311450
@@ -1526,6 +1545,39 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) {
15261545 return ncost;
15271546}
15281547
1548+ float NetCostHandler::get_chanz_cost_factor_ (const t_bb& bb) {
1549+ float place_cost_exp = placer_opts_.place_cost_exp ;
1550+
1551+ int num_inter_dir_conn;
1552+
1553+ if (bb.xmin == 0 && bb.ymin == 0 ) {
1554+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ];
1555+ } else if (bb.xmin == 0 ) {
1556+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1557+ acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymin -1 ];
1558+ } else if (bb.ymin == 0 ) {
1559+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1560+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymax ];
1561+ } else {
1562+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymax ] -
1563+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymax ] -
1564+ acc_tile_num_inter_die_conn_[bb.xmax ][bb.ymin -1 ] +
1565+ acc_tile_num_inter_die_conn_[bb.xmin -1 ][bb.ymin -1 ];
1566+ }
1567+
1568+ float z_cost_factor;
1569+ if (num_inter_dir_conn == 0 ) {
1570+ return 1 .0f ;
1571+ } else {
1572+ int bb_num_tiles = (bb.xmax - bb.xmin + 1 ) * (bb.ymax - bb.ymin + 1 );
1573+ z_cost_factor = bb_num_tiles / static_cast <float >(num_inter_dir_conn);
1574+ z_cost_factor = pow ((double )z_cost_factor, (double )place_cost_exp);
1575+ }
1576+
1577+ return z_cost_factor;
1578+
1579+ }
1580+
15291581double NetCostHandler::recompute_bb_cost_ () {
15301582 double cost = 0 ;
15311583
0 commit comments