Skip to content

Commit 3b18dea

Browse files
Merge branch 'master' into temp_remove_redundant_strong_odin_tests and resolve conflicts
2 parents 1c6ee56 + 7f0839d commit 3b18dea

File tree

34 files changed

+619
-561
lines changed

34 files changed

+619
-561
lines changed

vpr/src/base/stats.cpp

Lines changed: 119 additions & 116 deletions
Large diffs are not rendered by default.

vpr/src/base/stats.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,6 @@ void routing_stats(const Netlist<>& net_list,
2424
RRSwitchId wire_to_ipin_switch,
2525
bool is_flat);
2626

27-
/**
28-
* @brief Calculates the routing channel width at each grid location.
29-
*
30-
* Iterates through all RR nodes and counts how many wires pass through each (x, y) location
31-
* for both horizontal (CHANX) and vertical (CHANY) channels.
32-
*
33-
* @return A pair of 3D matrices:
34-
* - First: CHANX width per [layer][x][y]
35-
* - Second: CHANY width per [layer][x][y]
36-
*/
37-
std::pair<vtr::NdMatrix<int, 3>, vtr::NdMatrix<int, 3>> calculate_channel_width();
38-
3927
void print_wirelen_prob_dist(bool is_flat);
4028

4129
void print_lambda();

vpr/src/base/vpr_context.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,16 @@ struct DeviceContext : public Context {
263263

264264
int delayless_switch_idx = UNDEFINED;
265265

266+
/// Stores the number of CHANX wire segments in each routing channel segment at [layer][x][y]
267+
vtr::NdMatrix<int, 3> rr_chanx_segment_width;
268+
/// Stores the number of CHANY wire segments in each routing channel segment at [layer][x][y]
269+
vtr::NdMatrix<int, 3> rr_chany_segment_width;
270+
271+
/// Stores the maximum channel segment width in each horizontal channel
272+
std::vector<int> rr_chanx_width;
273+
/// Stores the maximum channel segment width in each vertical channel
274+
std::vector<int> rr_chany_width;
275+
266276
bool rr_graph_is_flat = false;
267277

268278
/*

vpr/src/draw/draw.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -437,16 +437,18 @@ void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) {
437437
t_draw_state* draw_state = get_draw_state_vars();
438438
t_draw_coords* draw_coords = get_draw_coords_vars();
439439
const DeviceContext& device_ctx = g_vpr_ctx.device();
440+
const DeviceGrid& grid = device_ctx.grid;
440441
const RRGraphView& rr_graph = device_ctx.rr_graph;
441442

442443
/* Store a reference to block location variables so that other drawing
443444
* functions can access block location information without accessing
444445
* the global placement state, which is inaccessible during placement.*/
445446
draw_state->set_graphics_blk_loc_registry_ref(blk_loc_registry);
446447

447-
if (!draw_state->show_graphics && !draw_state->save_graphics
448-
&& draw_state->graphics_commands.empty())
449-
return; //do not initialize only if --disp off and --save_graphics off
448+
// do not initialize only if --disp off and --save_graphics off
449+
if (!draw_state->show_graphics && !draw_state->save_graphics && draw_state->graphics_commands.empty()) {
450+
return;
451+
}
450452

451453
/* Each time routing is on screen, need to reallocate the color of each *
452454
* rr_node, as the number of rr_nodes may change. */
@@ -468,32 +470,28 @@ void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) {
468470
}
469471

470472
size_t j = 0;
471-
for (size_t i = 0; i < (device_ctx.grid.width() - 1); i++) {
473+
for (size_t i = 0; i < grid.width() - 1; i++) {
472474
draw_coords->tile_x[i] = (i * draw_coords->get_tile_width()) + j;
473-
j += device_ctx.chan_width.y_list[i] + 1; /* N wires need N+1 units of space */
475+
j += device_ctx.rr_chany_width[i] + 1; // N wires need N+1 units of space
474476
}
475-
draw_coords->tile_x[device_ctx.grid.width() - 1] = ((device_ctx.grid.width()
476-
- 1)
477-
* draw_coords->get_tile_width())
478-
+ j;
477+
draw_coords->tile_x[grid.width() - 1] = (grid.width() - 1) * draw_coords->get_tile_width() + j;
478+
479479
j = 0;
480-
for (size_t i = 0; i < (device_ctx.grid.height() - 1); ++i) {
480+
for (size_t i = 0; i < device_ctx.grid.height() - 1; ++i) {
481481
draw_coords->tile_y[i] = (i * draw_coords->get_tile_width()) + j;
482-
j += device_ctx.chan_width.x_list[i] + 1;
482+
j += device_ctx.rr_chanx_width[i] + 1;
483483
}
484-
draw_coords->tile_y[device_ctx.grid.height() - 1] = ((device_ctx.grid.height() - 1) * draw_coords->get_tile_width())
485-
+ j;
484+
draw_coords->tile_y[grid.height() - 1] = (grid.height() - 1) * draw_coords->get_tile_width() + j;
485+
486486
/* Load coordinates of sub-blocks inside the clbs */
487487
draw_internal_init_blk();
488488
//Margin beyond edge of the drawn device to extend the visible world
489489
//Setting this to > 0.0 means 'Zoom Fit' leave some fraction of white
490490
//space around the device edges
491491
constexpr float VISIBLE_MARGIN = 0.01;
492492

493-
float draw_width = draw_coords->tile_x[device_ctx.grid.width() - 1]
494-
+ draw_coords->get_tile_width();
495-
float draw_height = draw_coords->tile_y[device_ctx.grid.height() - 1]
496-
+ draw_coords->get_tile_width();
493+
float draw_width = draw_coords->tile_x[grid.width() - 1] + draw_coords->get_tile_width();
494+
float draw_height = draw_coords->tile_y[grid.height() - 1] + draw_coords->get_tile_width();
497495

498496
initial_world = ezgl::rectangle(
499497
{-VISIBLE_MARGIN * draw_width, -VISIBLE_MARGIN * draw_height},

vpr/src/place/net_cost_handler.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -143,33 +143,34 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
143143
const size_t grid_height = device_ctx.grid.height();
144144
const size_t grid_width = device_ctx.grid.width();
145145

146-
/* These arrays contain accumulative channel width between channel zero and
147-
* the channel specified by the given index. The accumulated channel width
148-
* is inclusive, meaning that it includes both channel zero and channel `idx`.
149-
* To compute the total channel width between channels 'low' and 'high', use the
150-
* following formula:
151-
* acc_chan?_width_[high] - acc_chan?_width_[low - 1]
152-
* This returns the total number of tracks between channels 'low' and 'high',
153-
* including tracks in these channels.
154-
*/
146+
// These arrays contain accumulative channel width between channel zero and
147+
// the channel specified by the given index. The accumulated channel width
148+
// is inclusive, meaning that it includes both channel zero and channel `idx`.
149+
// To compute the total channel width between channels 'low' and 'high', use the
150+
// following formula:
151+
// acc_chan?_width_[high] - acc_chan?_width_[low - 1]
152+
// This returns the total number of tracks between channels 'low' and 'high',
153+
// including tracks in these channels.
155154
acc_chanx_width_ = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept {
156-
int chan_x_width = device_ctx.chan_width.x_list[y];
155+
int chan_x_width = device_ctx.rr_chanx_width[y];
157156

158-
/* If the number of tracks in a channel is zero, two consecutive elements take the same
159-
* value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
160-
* potential issue, we assume that the channel width is at least 1.
161-
*/
162-
if (chan_x_width == 0)
157+
// If the number of tracks in a channel is zero, two consecutive elements take the same
158+
// value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
159+
// potential issue, we assume that the channel width is at least 1.
160+
if (chan_x_width == 0) {
163161
return 1;
162+
}
164163

165164
return chan_x_width;
166165
});
166+
167167
acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
168-
int chan_y_width = device_ctx.chan_width.y_list[x];
168+
int chan_y_width = device_ctx.rr_chany_width[x];
169169

170170
// to avoid a division by zero
171-
if (chan_y_width == 0)
171+
if (chan_y_width == 0) {
172172
return 1;
173+
}
173174

174175
return chan_y_width;
175176
});
@@ -1819,7 +1820,8 @@ std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> NetCostHandler::es
18191820
}
18201821
}
18211822

1822-
const auto [chanx_width, chany_width] = calculate_channel_width();
1823+
const vtr::NdMatrix<int, 3>& chanx_width = device_ctx.rr_chanx_segment_width;
1824+
const vtr::NdMatrix<int, 3>& chany_width = device_ctx.rr_chany_segment_width;
18231825

18241826
VTR_ASSERT(chanx_util.size() == chany_util.size());
18251827
VTR_ASSERT(chanx_util.ndims() == chany_util.ndims());

vpr/src/route/rr_graph_generation/rr_graph.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,14 @@ static void build_rr_graph(e_graph_type graph_type,
376376
static int get_delayless_switch_id(const t_det_routing_arch& det_routing_arch,
377377
bool load_rr_graph);
378378

379+
/**
380+
* @brief Calculates the routing channel width at each grid location.
381+
*
382+
* Iterates through all RR nodes and counts how many wires pass through each (layer, x, y) location
383+
* for both horizontal (CHANX) and vertical (CHANY) channels.
384+
*/
385+
static void alloc_and_init_channel_width();
386+
379387
/******************* Subroutine definitions *******************************/
380388

381389
void create_rr_graph(e_graph_type graph_type,
@@ -533,6 +541,8 @@ void create_rr_graph(e_graph_type graph_type,
533541
device_ctx.rr_graph.rr_nodes(),
534542
is_flat);
535543

544+
alloc_and_init_channel_width();
545+
536546
print_rr_graph_stats();
537547

538548
// Write out rr graph file if needed - Currently, writing the flat rr-graph is not supported since loading from a flat rr-graph is not supported.
@@ -1116,6 +1126,53 @@ static int get_delayless_switch_id(const t_det_routing_arch& det_routing_arch,
11161126
return delayless_switch;
11171127
}
11181128

1129+
static void alloc_and_init_channel_width() {
1130+
DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device();
1131+
const DeviceGrid& grid = mutable_device_ctx.grid;
1132+
const auto& rr_graph = mutable_device_ctx.rr_graph;
1133+
1134+
vtr::NdMatrix<int, 3>& chanx_width = mutable_device_ctx.rr_chanx_segment_width;
1135+
vtr::NdMatrix<int, 3>& chany_width = mutable_device_ctx.rr_chany_segment_width;
1136+
1137+
chanx_width.resize({grid.get_num_layers(), grid.width(), grid.height()});
1138+
chany_width.resize({grid.get_num_layers(), grid.width(), grid.height()});
1139+
1140+
chanx_width.fill(0);
1141+
chany_width.fill(0);
1142+
1143+
for (RRNodeId node_id : rr_graph.nodes()) {
1144+
e_rr_type rr_type = rr_graph.node_type(node_id);
1145+
1146+
if (rr_type == e_rr_type::CHANX) {
1147+
int y = rr_graph.node_ylow(node_id);
1148+
int layer = rr_graph.node_layer_low(node_id);
1149+
for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) {
1150+
chanx_width[layer][x][y] += rr_graph.node_capacity(node_id);
1151+
}
1152+
} else if (rr_type == e_rr_type::CHANY) {
1153+
int x = rr_graph.node_xlow(node_id);
1154+
int layer = rr_graph.node_layer_low(node_id);
1155+
for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) {
1156+
chany_width[layer][x][y] += rr_graph.node_capacity(node_id);
1157+
}
1158+
}
1159+
}
1160+
1161+
std::vector<int>& chanx_width_list = mutable_device_ctx.rr_chanx_width;
1162+
std::vector<int>& chany_width_list = mutable_device_ctx.rr_chany_width;
1163+
1164+
chanx_width_list.resize(grid.height());
1165+
chany_width_list.resize(grid.width());
1166+
1167+
std::ranges::fill(chanx_width_list, 0);
1168+
std::ranges::fill(chany_width_list, 0);
1169+
1170+
for (t_physical_tile_loc loc : grid.all_locations()) {
1171+
chanx_width_list[loc.y] = std::max(chanx_width[loc.layer_num][loc.x][loc.y], chanx_width_list[loc.y]);
1172+
chany_width_list[loc.x] = std::max(chany_width[loc.layer_num][loc.x][loc.y], chany_width_list[loc.x]);
1173+
}
1174+
}
1175+
11191176
void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
11201177
const t_det_routing_arch& det_routing_arch,
11211178
t_physical_tile_type_ptr physical_tile,
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time
2-
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.07 vpr 64.88 MiB -1 -1 0.15 28236 3 0.06 -1 -1 36544 -1 -1 72 99 1 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 66436 99 130 353 483 1 222 302 13 13 169 clb auto 25.3 MiB 0.03 1748.73 707 29650 4654 11713 13283 64.9 MiB 0.02 0.00 26 1506 9 3.33e+06 2.28e+06 360896. 2135.48 0.38
3-
k4_N10_memSize16384_memData64.xml diffeq1.v common 2.66 vpr 67.86 MiB -1 -1 0.19 32844 23 0.23 -1 -1 37316 -1 -1 74 162 0 5 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 69484 162 96 1186 1127 1 667 337 13 13 169 clb auto 28.2 MiB 0.10 7906.16 4859 81205 21212 54650 5343 67.9 MiB 0.08 0.00 50 9091 14 3.33e+06 2.67e+06 641417. 3795.37 1.37
4-
k4_N10_memSize16384_memData64.xml single_wire.v common 0.31 vpr 62.98 MiB -1 -1 0.05 25804 1 0.01 -1 -1 33044 -1 -1 0 1 0 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 64496 1 1 1 2 0 1 2 3 3 9 -1 auto 24.5 MiB 0.00 2 2 3 0 3 0 63.0 MiB 0.00 0.00 2 1 1 30000 0 1489.46 165.495 0.00
5-
k4_N10_memSize16384_memData64.xml single_ff.v common 0.41 vpr 63.02 MiB -1 -1 0.08 26064 1 0.01 -1 -1 33064 -1 -1 1 2 0 0 success v8.0.0-12799-g50a644d78 release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-60-generic x86_64 2025-06-10T17:21:16 llavign1-OptiPlex-7070 /home/llavign1/Gits/vtr-clone/vtr_flow/tasks 64532 2 1 3 4 1 3 4 3 3 9 -1 auto 24.7 MiB 0.00 6 6 9 6 0 3 63.0 MiB 0.00 0.00 16 5 1 30000 30000 2550.78 283.420 0.00
2+
k4_N10_memSize16384_memData64.xml ch_intrinsics.v common 1.64 vpr 64.26 MiB -1 -1 0.18 21064 3 0.07 -1 -1 32716 -1 -1 72 99 1 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 65800 99 130 353 483 1 222 302 13 13 169 clb auto 24.4 MiB 0.03 1748.73 1183 124778 46879 23081 54818 64.3 MiB 0.15 0.00 22 2035 37 3.33e+06 2.28e+06 311708. 1844.43 0.53
3+
k4_N10_memSize16384_memData64.xml diffeq1.v common 2.81 vpr 67.24 MiB -1 -1 0.22 25288 23 0.24 -1 -1 33564 -1 -1 74 162 0 5 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 68852 162 96 1186 1127 1 667 337 13 13 169 clb auto 27.5 MiB 0.12 7906.16 4910 96441 37729 58151 561 67.2 MiB 0.12 0.00 50 9566 16 3.33e+06 2.67e+06 641417. 3795.37 1.17
4+
k4_N10_memSize16384_memData64.xml single_wire.v common 0.52 vpr 61.97 MiB -1 -1 0.06 19028 1 0.02 -1 -1 29568 -1 -1 0 1 0 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 63456 1 1 1 2 0 1 2 3 3 9 -1 auto 23.7 MiB 0.00 2 2 3 0 3 0 62.0 MiB 0.00 0.00 2 1 1 30000 0 1489.46 165.495 0.00
5+
k4_N10_memSize16384_memData64.xml single_ff.v common 0.49 vpr 62.35 MiB -1 -1 0.06 19528 1 0.02 -1 -1 29612 -1 -1 1 2 0 0 success v8.0.0-14178-g4818739e3-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-71-generic x86_64 2025-10-15T12:13:40 betzgrp-wintermute /home/gholam39/vpr/vtr-verilog-to-routing/vtr_flow 63844 2 1 3 4 1 3 4 3 3 9 -1 auto 23.7 MiB 0.00 6 6 9 3 3 3 62.3 MiB 0.00 0.00 26 15 1 30000 30000 4706.78 522.975 0.01

0 commit comments

Comments
 (0)