diff --git a/.travis/fpga_verilog_reg_test.sh b/.travis/fpga_verilog_reg_test.sh index 35086b65b..a8d1e8eb7 100755 --- a/.travis/fpga_verilog_reg_test.sh +++ b/.travis/fpga_verilog_reg_test.sh @@ -90,6 +90,12 @@ python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/power_gated_design/p echo -e "Testing Depopulated crossbar in local routing"; python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/depopulate_crossbar --debug --show_thread_logs +echo -e "Testing Fully connected output crossbar in local routing"; +python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/fully_connected_output_crossbar --debug --show_thread_logs + +echo -e "Testing no local routing architecture"; +python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/no_local_routing --debug --show_thread_logs + echo -e "Testing through channels in tileable routing"; python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/thru_channel/thru_narrow_tile --debug --show_thread_logs python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/thru_channel/thru_wide_tile --debug --show_thread_logs diff --git a/openfpga/src/base/openfpga_pb_pin_fixup.cpp b/openfpga/src/base/openfpga_pb_pin_fixup.cpp index 2a8686190..17c48fb5b 100644 --- a/openfpga/src/base/openfpga_pb_pin_fixup.cpp +++ b/openfpga/src/base/openfpga_pb_pin_fixup.cpp @@ -133,6 +133,7 @@ void update_cluster_pin_with_post_routing_results(const DeviceContext& device_ct if (routing_net_id == cluster_net_id) { continue; } + /* Add to net modification */ vpr_clustering_annotation.rename_net(blk_id, j, routing_net_id); diff --git a/openfpga/src/repack/repack.cpp b/openfpga/src/repack/repack.cpp index 39697f100..c3ab88f1c 100644 --- a/openfpga/src/repack/repack.cpp +++ b/openfpga/src/repack/repack.cpp @@ -38,6 +38,8 @@ static void rec_find_routed_sink_pb_graph_pins(const t_pb* pb, const t_pb_graph_pin* source_pb_pin, const AtomNetId& atom_net_id, + const VprDeviceAnnotation& device_annotation, + const std::map& pb_pin_mapped_nets, t_pb_graph_pin** pb_graph_pin_lookup_from_index, std::vector& sink_pb_pins) { @@ -69,7 +71,49 @@ void rec_find_routed_sink_pb_graph_pins(const t_pb* pb, if ( (true == sink_pb_pin->parent_node->is_root()) && (OUT_PORT == sink_pb_pin->port->type)) { - sink_pb_pins.push_back(sink_pb_pin); + /* Be careful!!! There is an inconsistency between pb_route and actual net mapping! + * The sink_pb_pin in the pb_route may not be the one we want + * due to net remapping in the routing stage + * If the net becomes invalid, we search all the fan-out of the source pb_pin + * and find one that is mapped to the net + */ + AtomNetId remapped_net = AtomNetId::INVALID(); + auto remapped_result = pb_pin_mapped_nets.find(sink_pb_pin); + if (remapped_result != pb_pin_mapped_nets.end()) { + remapped_net = remapped_result->second; + } + if (atom_net_id == remapped_net) { + sink_pb_pins.push_back(sink_pb_pin); + } else { + VTR_ASSERT_SAFE(atom_net_id != remapped_net); + bool found_actual_sink_pb_pin = false; + for (int iedge = 0; iedge < source_pb_pin->num_output_edges; ++iedge) { + /* Bypass the interconnect that does not belong to a physical mode */ + int parent_mode_index = source_pb_pin->output_edges[iedge]->interconnect->parent_mode_index; + VTR_ASSERT(parent_mode_index < sink_pb_pin->parent_node->pb_type->num_modes); + if (&(sink_pb_pin->parent_node->pb_type->modes[parent_mode_index]) + != device_annotation.physical_mode(sink_pb_pin->parent_node->pb_type)) { + continue; + } + for (int ipin = 0; ipin < source_pb_pin->output_edges[iedge]->num_output_pins; ++ipin) { + const t_pb_graph_pin* cand_sink_pb_pin = source_pb_pin->output_edges[iedge]->output_pins[ipin]; + auto cand_remapped_result = pb_pin_mapped_nets.find(cand_sink_pb_pin); + AtomNetId cand_sink_pb_pin_net = AtomNetId::INVALID(); + if (cand_remapped_result != pb_pin_mapped_nets.end()) { + cand_sink_pb_pin_net = cand_remapped_result->second; + } + if (atom_net_id == cand_sink_pb_pin_net) { + sink_pb_pins.push_back(const_cast(cand_sink_pb_pin)); + found_actual_sink_pb_pin = true; + break; + } + } + if (true == found_actual_sink_pb_pin) { + break; + } + } + VTR_ASSERT(true == found_actual_sink_pb_pin); + } continue; } @@ -78,7 +122,7 @@ void rec_find_routed_sink_pb_graph_pins(const t_pb* pb, } for (t_pb_graph_pin* sink_pb_pin : sink_pb_pins_to_search) { - rec_find_routed_sink_pb_graph_pins(pb, sink_pb_pin, atom_net_id, pb_graph_pin_lookup_from_index, sink_pb_pins); + rec_find_routed_sink_pb_graph_pins(pb, sink_pb_pin, atom_net_id, device_annotation, pb_pin_mapped_nets, pb_graph_pin_lookup_from_index, sink_pb_pins); } } @@ -90,10 +134,12 @@ static std::vector find_routed_pb_graph_pins_atom_net(const t_pb* pb, const t_pb_graph_pin* source_pb_pin, const AtomNetId& atom_net_id, + const VprDeviceAnnotation& device_annotation, + const std::map& pb_pin_mapped_nets, t_pb_graph_pin** pb_graph_pin_lookup_from_index) { std::vector sink_pb_pins; - rec_find_routed_sink_pb_graph_pins(pb, source_pb_pin, atom_net_id, pb_graph_pin_lookup_from_index, sink_pb_pins); + rec_find_routed_sink_pb_graph_pins(pb, source_pb_pin, atom_net_id, device_annotation, pb_pin_mapped_nets, pb_graph_pin_lookup_from_index, sink_pb_pins); return sink_pb_pins; } @@ -227,6 +273,34 @@ void add_lb_router_nets(LbRouter& lb_router, /* Build the fast look-up between pb_pin_id and pb_graph_pin pointer */ t_pb_graph_pin** pb_graph_pin_lookup_from_index = alloc_and_load_pb_graph_pin_lookup_from_index(lb_type); + /* Build a fast look-up between pb_graph_pin and atom net id which it is mapped to + * Note that, we only care the pb_graph_pin at the root pb_graph_node + * where pb_graph_pin may be remapped to a new net due to routing optimization + */ + std::map pb_pin_mapped_nets; + for (int j = 0; j < lb_type->pb_type->num_pins; j++) { + /* Find the net mapped to this pin in clustering results*/ + ClusterNetId cluster_net_id = clustering_ctx.clb_nlist.block_net(block_id, j); + /* Get the actual net id because it may be renamed during routing */ + if (true == clustering_annotation.is_net_renamed(block_id, j)) { + cluster_net_id = clustering_annotation.net(block_id, j); + } + + /* Bypass unmapped pins */ + if (ClusterNetId::INVALID() == cluster_net_id) { + continue; + } + + /* Get the source pb_graph pin and find the rr_node in logical block routing resource graph */ + const t_pb_graph_pin* pb_pin = get_pb_graph_node_pin_from_block_pin(block_id, j); + VTR_ASSERT(pb_pin->parent_node == pb->pb_graph_node); + + AtomNetId atom_net_id = atom_ctx.lookup.atom_net(cluster_net_id); + VTR_ASSERT(AtomNetId::INVALID() != atom_net_id); + + pb_pin_mapped_nets[pb_pin] = atom_net_id; + } + /* Cache all the source nodes and sinks node for each net * net_terminal[net][0] is the list of source nodes * net_terminal[net][1] is the list of sink nodes @@ -238,17 +312,6 @@ void add_lb_router_nets(LbRouter& lb_router, /* Find the source nodes for the nets mapped to inputs of a clustered block */ for (int j = 0; j < lb_type->pb_type->num_pins; j++) { - /* Find the net mapped to this pin in clustering results*/ - ClusterNetId cluster_net_id = clustering_ctx.clb_nlist.block_net(block_id, j); - /* Get the actual net id because it may be renamed during routing */ - if (true == clustering_annotation.is_net_renamed(block_id, j)) { - cluster_net_id = clustering_annotation.net(block_id, j); - } - /* Bypass unmapped pins */ - if (ClusterNetId::INVALID() == cluster_net_id) { - continue; - } - /* Get the source pb_graph pin and find the rr_node in logical block routing resource graph */ const t_pb_graph_pin* source_pb_pin = get_pb_graph_node_pin_from_block_pin(block_id, j); VTR_ASSERT(source_pb_pin->parent_node == pb->pb_graph_node); @@ -258,22 +321,25 @@ void add_lb_router_nets(LbRouter& lb_router, continue; } + /* Find the net mapped to this pin in clustering results*/ + AtomNetId atom_net_id = pb_pin_mapped_nets[source_pb_pin]; + /* Bypass unmapped pins */ + if (AtomNetId::INVALID() == atom_net_id) { + continue; + } + /* The outputs of pb_graph_node is INTERMEDIATE node in the routing resource graph, * they are all connected to a common source node */ LbRRNodeId source_lb_rr_node = lb_rr_graph.find_node(LB_INTERMEDIATE, source_pb_pin); VTR_ASSERT(true == lb_rr_graph.valid_node_id(source_lb_rr_node)); - AtomNetId atom_net_id = atom_ctx.lookup.atom_net(cluster_net_id); - VTR_ASSERT(AtomNetId::INVALID() != atom_net_id); - - int pb_route_index = find_pb_route_remapped_source_pb_pin(pb, source_pb_pin, atom_net_id); t_pb_graph_pin* packing_source_pb_pin = get_pb_graph_node_pin_from_block_pin(block_id, pb_route_index); VTR_ASSERT(nullptr != packing_source_pb_pin); /* Find all the sink pins in the pb_route, we walk through the input pins and find the pin */ - std::vector sink_pb_graph_pins = find_routed_pb_graph_pins_atom_net(pb, packing_source_pb_pin, atom_net_id, pb_graph_pin_lookup_from_index); + std::vector sink_pb_graph_pins = find_routed_pb_graph_pins_atom_net(pb, packing_source_pb_pin, atom_net_id, device_annotation, pb_pin_mapped_nets, pb_graph_pin_lookup_from_index); std::vector sink_lb_rr_nodes = find_lb_net_physical_sink_lb_rr_nodes(lb_rr_graph, sink_pb_graph_pins, device_annotation); VTR_ASSERT(sink_lb_rr_nodes.size() == sink_pb_graph_pins.size()); @@ -338,7 +404,7 @@ void add_lb_router_nets(LbRouter& lb_router, VTR_ASSERT(AtomNetId::INVALID() != atom_net_id); /* Find all the sink pins in the pb_route */ - std::vector sink_pb_graph_pins = find_routed_pb_graph_pins_atom_net(pb, source_pb_pin, atom_net_id, pb_graph_pin_lookup_from_index); + std::vector sink_pb_graph_pins = find_routed_pb_graph_pins_atom_net(pb, source_pb_pin, atom_net_id, device_annotation, pb_pin_mapped_nets, pb_graph_pin_lookup_from_index); std::vector sink_lb_rr_nodes = find_lb_net_physical_sink_lb_rr_nodes(lb_rr_graph, sink_pb_graph_pins, device_annotation); VTR_ASSERT(sink_lb_rr_nodes.size() == sink_pb_graph_pins.size()); diff --git a/openfpga_flow/openfpga_arch/k4_N4_no_local_routing_40nm_frame_openfpga.xml b/openfpga_flow/openfpga_arch/k4_N4_no_local_routing_40nm_frame_openfpga.xml new file mode 100644 index 000000000..9ba39b3ce --- /dev/null +++ b/openfpga_flow/openfpga_arch/k4_N4_no_local_routing_40nm_frame_openfpga.xml @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + + 10e-12 5e-12 5e-12 + + + 10e-12 5e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/tasks/fpga_verilog/fully_connected_output_crossbar/config/task.conf b/openfpga_flow/tasks/fpga_verilog/fully_connected_output_crossbar/config/task.conf new file mode 100644 index 000000000..f8717b5fd --- /dev/null +++ b/openfpga_flow/tasks/fpga_verilog/fully_connected_output_crossbar/config/task.conf @@ -0,0 +1,38 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = true +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=vpr_blif + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/OpenFPGAShellScripts/example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_40nm_frame_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml +external_fabric_key_file= + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_full_output_crossbar_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif + +[SYNTHESIS_PARAM] +bench0_top = and2 +bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act +bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v +bench0_chan_width = 300 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test= +vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/tasks/fpga_verilog/no_local_routing/config/task.conf b/openfpga_flow/tasks/fpga_verilog/no_local_routing/config/task.conf new file mode 100644 index 000000000..c076b156c --- /dev/null +++ b/openfpga_flow/tasks/fpga_verilog/no_local_routing/config/task.conf @@ -0,0 +1,38 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = true +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=vpr_blif + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/OpenFPGAShellScripts/example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_N4_no_local_routing_40nm_frame_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml +external_fabric_key_file= + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_N4_tileable_no_local_routing_40nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif + +[SYNTHESIS_PARAM] +bench0_top = and2 +bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act +bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v +bench0_chan_width = 300 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test= +vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/vpr_arch/k4_N4_tileable_full_output_crossbar_40nm.xml b/openfpga_flow/vpr_arch/k4_N4_tileable_full_output_crossbar_40nm.xml new file mode 100644 index 000000000..16ceb2be2 --- /dev/null +++ b/openfpga_flow/vpr_arch/k4_N4_tileable_full_output_crossbar_40nm.xml @@ -0,0 +1,291 @@ + + + + + + + + + + + + + + + + + + + + + + + + io.outpad io.inpad + io.outpad io.inpad + io.outpad io.inpad + io.outpad io.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/vpr_arch/k4_N4_tileable_no_local_routing_40nm.xml b/openfpga_flow/vpr_arch/k4_N4_tileable_no_local_routing_40nm.xml new file mode 100644 index 000000000..06499b328 --- /dev/null +++ b/openfpga_flow/vpr_arch/k4_N4_tileable_no_local_routing_40nm.xml @@ -0,0 +1,286 @@ + + + + + + + + + + + + + + + + + + + + + + + + io.outpad io.inpad + io.outpad io.inpad + io.outpad io.inpad + io.outpad io.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +