diff --git a/vpr/src/route/rr_graph_generation/clb2clb_directs.cpp b/vpr/src/route/rr_graph_generation/clb2clb_directs.cpp index 905b47ff1f..45399efbeb 100644 --- a/vpr/src/route/rr_graph_generation/clb2clb_directs.cpp +++ b/vpr/src/route/rr_graph_generation/clb2clb_directs.cpp @@ -62,6 +62,8 @@ std::vector alloc_and_load_clb_to_clb_directs(const std::v clb_to_clb_directs[i].to_clb_type = physical_tile; tile_port = find_tile_port_by_name(physical_tile, port_name); + // Cache the destination port name as the pin index is not enough to identify if the destination subtile is the one we want + clb_to_clb_directs[i].to_port = port_name; if (start_pin_index == UNDEFINED) { VTR_ASSERT(start_pin_index == end_pin_index); diff --git a/vpr/src/route/rr_graph_generation/clb2clb_directs.h b/vpr/src/route/rr_graph_generation/clb2clb_directs.h index a40e24f5ed..f3f1f74a48 100644 --- a/vpr/src/route/rr_graph_generation/clb2clb_directs.h +++ b/vpr/src/route/rr_graph_generation/clb2clb_directs.h @@ -17,6 +17,7 @@ struct t_clb_to_clb_directs { int from_clb_pin_start_index; int from_clb_pin_end_index; t_physical_tile_type_ptr to_clb_type; + std::string to_port; int to_clb_pin_start_index; int to_clb_pin_end_index; int switch_index; //The switch type used by this direct connection diff --git a/vpr/src/route/rr_graph_generation/tileable_rr_graph/tileable_rr_graph_gsb.cpp b/vpr/src/route/rr_graph_generation/tileable_rr_graph/tileable_rr_graph_gsb.cpp index 8b5305b51e..50e25c39d6 100644 --- a/vpr/src/route/rr_graph_generation/tileable_rr_graph/tileable_rr_graph_gsb.cpp +++ b/vpr/src/route/rr_graph_generation/tileable_rr_graph/tileable_rr_graph_gsb.cpp @@ -1763,7 +1763,7 @@ void build_direct_connections_for_one_gsb(const RRGraphView& rr_graph, int to_subtile_cap = z + directs[i].sub_tile_offset; /* If the destination subtile is out of range, there is no qualified IPINs */ if (to_subtile_cap < 0 || to_subtile_cap >= to_grid_type->capacity) { - continue; + continue; } /* Iterate over all sub_tiles to get the sub_tile which the target_cap belongs to. */ const t_sub_tile* to_sub_tile = nullptr; @@ -1774,6 +1774,15 @@ void build_direct_connections_for_one_gsb(const RRGraphView& rr_graph, } } VTR_ASSERT(to_sub_tile != nullptr); + // Check if the to port is the one from the subtile, if not, pass as this is not the destination + bool port_match = false; + for (auto to_sub_tile_port : to_sub_tile->ports) { + if (std::string(to_sub_tile_port.name) == clb_to_clb_directs[i].to_port) { + port_match = true; + break; + } + } + if (!port_match) continue; if (relative_ipin >= to_sub_tile->num_phy_pins) continue; // If this block has capacity > 1 then the pins of z position > 0 are offset // by the number of pins per capacity instance @@ -1792,7 +1801,7 @@ void build_direct_connections_for_one_gsb(const RRGraphView& rr_graph, to_grid_coordinate.y() + to_grid_type->pin_height_offset[ipin], e_rr_type::IPIN, ipin, ipin_grid_side[0]); - /* add edges to the opin_node */ + // add edges to the opin_node */ if (!opin_node_id) { VTR_ASSERT(opin_node_id); } diff --git a/vtr_flow/arch/timing/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_mem16K_negz_nonLR_caravel_io_skywater130nm.xml b/vtr_flow/arch/timing/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_mem16K_negz_nonLR_caravel_io_skywater130nm.xml new file mode 100644 index 0000000000..5e2961c312 --- /dev/null +++ b/vtr_flow/arch/timing/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_mem16K_negz_nonLR_caravel_io_skywater130nm.xml @@ -0,0 +1,1027 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io_top.outpad io_top.inpad + + + + + + + + + + + + + + io_right.outpad io_right.inpad + + + + + + + + + + + + + + io_bottom.outpad io_bottom.inpad + + + + + + + + + + + + + + io_left.outpad io_left.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + clb.clk clb.reset + clb.reg_in clb.sc_in clb.cin clb.O[7:0] clb.I0 clb.I0i clb.I1 clb.I1i clb.I2 clb.I2i clb.I3 clb.I3i + clb.O[15:8] clb.I4 clb.I4i clb.I5 clb.I5i clb.I6 clb.I6i clb.I7 clb.I7i + clb.reg_out clb.sc_out clb.cout + + + + + + + + + + + + + + + + + + + + + + memory.waddr[0:2] memory.raddr[0:2] memory.d_in[0:7] memory.d_out[0:7] + memory.waddr[3:5] memory.raddr[3:5] memory.d_in[8:15] memory.d_out[8:15] + memory.waddr[6:7] memory.raddr[6:7] memory.d_in[16:23] memory.d_out[16:23] + memory.waddr[8:8] memory.raddr[8:8] memory.d_in[24:31] memory.d_out[24:31] + memory.clk memory.wen memory.ren + + + + + + + + + + + + + + + + + + mult_8.a[0:2] mult_8.b[0:2] mult_8.out[0:5] + mult_8.a[3:5] mult_8.b[3:5] mult_8.out[6:10] + mult_8.a[6:7] mult_8.b[6:7] mult_8.out[11:15] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 + 1 + + + + 1 1 1 + 1 1 + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 235e-12 + 235e-12 + 235e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/benchmarks/microbenchmarks/two_mult8_ram32.blif b/vtr_flow/benchmarks/microbenchmarks/two_mult8_ram32.blif new file mode 100644 index 0000000000..0f5890a368 --- /dev/null +++ b/vtr_flow/benchmarks/microbenchmarks/two_mult8_ram32.blif @@ -0,0 +1,13 @@ +# Generated by Yosys 0.56+30 (git sha1 407d42511, g++ 11.4.0-1ubuntu1~22.04 -fPIC -O3) + +.model two_mult8_ram32 +.inputs a0[0] a0[1] a0[2] a0[3] a0[4] a0[5] a0[6] a0[7] b0[0] b0[1] b0[2] b0[3] b0[4] b0[5] b0[6] b0[7] a1[0] a1[1] a1[2] a1[3] a1[4] a1[5] a1[6] a1[7] b1[0] b1[1] b1[2] b1[3] b1[4] b1[5] b1[6] b1[7] clk waddr[0] waddr[1] waddr[2] waddr[3] waddr[4] waddr[5] waddr[6] waddr[7] waddr[8] raddr[0] raddr[1] raddr[2] raddr[3] raddr[4] raddr[5] raddr[6] raddr[7] raddr[8] +.outputs out[0] out[1] out[2] out[3] out[4] out[5] out[6] out[7] out[8] out[9] out[10] out[11] out[12] out[13] out[14] out[15] out[16] out[17] out[18] out[19] out[20] out[21] out[22] out[23] out[24] out[25] out[26] out[27] out[28] out[29] out[30] out[31] +.names $false +.names $true +1 +.names $undef +.subckt mult_8 A[7]=a0[0] A[6]=a0[1] A[5]=a0[2] A[4]=a0[3] A[3]=a0[4] A[2]=a0[5] A[1]=a0[6] A[0]=a0[7] B[7]=b0[0] B[6]=b0[1] B[5]=b0[2] B[4]=b0[3] B[3]=b0[4] B[2]=b0[5] B[1]=b0[6] B[0]=b0[7] Y[15]=mult_y0[0] Y[14]=mult_y0[1] Y[13]=mult_y0[2] Y[12]=mult_y0[3] Y[11]=mult_y0[4] Y[10]=mult_y0[5] Y[9]=mult_y0[6] Y[8]=mult_y0[7] Y[7]=mult_y0[8] Y[6]=mult_y0[9] Y[5]=mult_y0[10] Y[4]=mult_y0[11] Y[3]=mult_y0[12] Y[2]=mult_y0[13] Y[1]=mult_y0[14] Y[0]=mult_y0[15] +.subckt mult_8 A[7]=a1[0] A[6]=a1[1] A[5]=a1[2] A[4]=a1[3] A[3]=a1[4] A[2]=a1[5] A[1]=a1[6] A[0]=a1[7] B[7]=b1[0] B[6]=b1[1] B[5]=b1[2] B[4]=b1[3] B[3]=b1[4] B[2]=b1[5] B[1]=b1[6] B[0]=b1[7] Y[15]=mult_y1[0] Y[14]=mult_y1[1] Y[13]=mult_y1[2] Y[12]=mult_y1[3] Y[11]=mult_y1[4] Y[10]=mult_y1[5] Y[9]=mult_y1[6] Y[8]=mult_y1[7] Y[7]=mult_y1[8] Y[6]=mult_y1[9] Y[5]=mult_y1[10] Y[4]=mult_y1[11] Y[3]=mult_y1[12] Y[2]=mult_y1[13] Y[1]=mult_y1[14] Y[0]=mult_y1[15] +.subckt dpram_512x32 clk=clk d_in[31]=mult_y0[0] d_in[30]=mult_y0[1] d_in[29]=mult_y0[2] d_in[28]=mult_y0[3] d_in[27]=mult_y0[4] d_in[26]=mult_y0[5] d_in[25]=mult_y0[6] d_in[24]=mult_y0[7] d_in[23]=mult_y0[8] d_in[22]=mult_y0[9] d_in[21]=mult_y0[10] d_in[20]=mult_y0[11] d_in[19]=mult_y0[12] d_in[18]=mult_y0[13] d_in[17]=mult_y0[14] d_in[16]=mult_y0[15] d_in[15]=mult_y1[0] d_in[14]=mult_y1[1] d_in[13]=mult_y1[2] d_in[12]=mult_y1[3] d_in[11]=mult_y1[4] d_in[10]=mult_y1[5] d_in[9]=mult_y1[6] d_in[8]=mult_y1[7] d_in[7]=mult_y1[8] d_in[6]=mult_y1[9] d_in[5]=mult_y1[10] d_in[4]=mult_y1[11] d_in[3]=mult_y1[12] d_in[2]=mult_y1[13] d_in[1]=mult_y1[14] d_in[0]=mult_y1[15] d_out[31]=out[0] d_out[30]=out[1] d_out[29]=out[2] d_out[28]=out[3] d_out[27]=out[4] d_out[26]=out[5] d_out[25]=out[6] d_out[24]=out[7] d_out[23]=out[8] d_out[22]=out[9] d_out[21]=out[10] d_out[20]=out[11] d_out[19]=out[12] d_out[18]=out[13] d_out[17]=out[14] d_out[16]=out[15] d_out[15]=out[16] d_out[14]=out[17] d_out[13]=out[18] d_out[12]=out[19] d_out[11]=out[20] d_out[10]=out[21] d_out[9]=out[22] d_out[8]=out[23] d_out[7]=out[24] d_out[6]=out[25] d_out[5]=out[26] d_out[4]=out[27] d_out[3]=out[28] d_out[2]=out[29] d_out[1]=out[30] d_out[0]=out[31] raddr[8]=raddr[0] raddr[7]=raddr[1] raddr[6]=raddr[2] raddr[5]=raddr[3] raddr[4]=raddr[4] raddr[3]=raddr[5] raddr[2]=raddr[6] raddr[1]=raddr[7] raddr[0]=raddr[8] ren=$true waddr[8]=waddr[0] waddr[7]=waddr[1] waddr[6]=waddr[2] waddr[5]=waddr[3] waddr[4]=waddr[4] waddr[3]=waddr[5] waddr[2]=waddr[6] waddr[1]=waddr[7] waddr[0]=waddr[8] wen=$true +.end diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/config.txt new file mode 100644 index 0000000000..0127468a1e --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/config.txt @@ -0,0 +1,27 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/microbenchmarks + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add circuits to list to sweep +circuit_list_add=two_mult8_ram32.blif + +# Add architectures to list to sweep +arch_list_add=k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_mem16K_negz_nonLR_caravel_io_skywater130nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +script_params=-starting_stage vpr -track_memory_usage diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/golden_results.txt new file mode 100644 index 0000000000..f14bbee11c --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_mem16K_negz_nonLR_caravel_io_skywater130nm.xml two_mult8_ram32.blif common 1.03 vpr 46.25 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 51 1 -1 success v8.0.0-14246-g8605c8d19-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-4.4.0-19041-Microsoft x86_64 2025-10-27T16:32:35 LAPTOP-CVNHOGSN /home/xtang/github/vtr-verilog-to-routing/vtr_flow/tasks 47364 51 32 116 87 1 116 87 5 5 25 io auto 7.6 MiB 0.01 468 414 15639 7675 7110 854 46.3 MiB 0.03 0.00 4.81578 4.59694 -212.268 -4.59694 4.59694 0.01 8.69e-05 7.17e-05 0.0069174 0.0056358 -1 -1 -1 -1 56 703 23 773258 1.24189e+06 64384.8 2575.39 0.38 0.0484148 0.042138 2510 10345 -1 601 13 344 344 46168 27975 4.97734 4.97734 -235.832 -4.97734 -1.68869 -0.257244 78579.3 3143.17 0.00 0.01 -1 -1 -1 0.00 0.0032542 0.0029231 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt index c7b9883e1b..b068cedd5e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt @@ -116,6 +116,7 @@ regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_super_subtile regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset +regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_subtile_neg_z_offset2 regression_tests/vtr_reg_strong/strong_tileable_rr_graph_direct_super_subtile_neg_z_offset regression_tests/vtr_reg_strong/strong_direct_subtile regression_tests/vtr_reg_strong/strong_direct_super_subtile