OpenFPGA/openfpga/src/base/openfpga_link_arch.cpp

307 lines
13 KiB
C++
Raw Normal View History

/********************************************************************
* This file includes functions to read an OpenFPGA architecture file
* which are built on the libarchopenfpga library
*******************************************************************/
#include <cmath>
#include <iterator>
/* Headers from vtrutil library */
#include "vtr_time.h"
#include "vtr_assert.h"
#include "vtr_log.h"
/* Headers from vpr library */
#include "timing_info.h"
#include "AnalysisDelayCalculator.h"
#include "net_delay.h"
#include "read_activity.h"
#include "vpr_device_annotation.h"
#include "pb_type_utils.h"
#include "annotate_pb_types.h"
#include "annotate_pb_graph.h"
#include "annotate_routing.h"
#include "annotate_rr_graph.h"
2020-02-12 15:58:23 -06:00
#include "mux_library_builder.h"
2020-02-14 23:21:32 -06:00
#include "build_tile_direct.h"
#include "annotate_placement.h"
#include "openfpga_link_arch.h"
/* Include global variables of VPR */
#include "globals.h"
/* begin namespace openfpga */
namespace openfpga {
2020-02-12 15:58:23 -06:00
/********************************************************************
* A function to identify if the routing resource graph generated by
* VPR is support by OpenFPGA
* - Currently we only support uni-directional
* It means every routing tracks must have a direction
*******************************************************************/
static
bool is_vpr_rr_graph_supported(const RRGraph& rr_graph) {
/* Check if the rr_graph is uni-directional*/
for (const RRNodeId& node : rr_graph.nodes()) {
if (CHANX != rr_graph.node_type(node) && CHANY != rr_graph.node_type(node)) {
continue;
}
if (BI_DIRECTION == rr_graph.node_direction(node)) {
VTR_LOG_ERROR("Routing resource graph is bi-directional. OpenFPGA currently supports uni-directional routing architecture only.\n");
return false;
}
}
return true;
}
/********************************************************************
* Find the number of clock cycles in simulation based on the average signal density
*******************************************************************/
static
size_t recommend_num_sim_clock_cycle(const AtomContext& atom_ctx,
const std::unordered_map<AtomNetId, t_net_power>& net_activity,
const float& sim_window_size) {
size_t recmd_num_sim_clock_cycle = 0;
float avg_density = 0.;
size_t net_cnt = 0;
float weighted_avg_density = 0.;
size_t weighted_net_cnt = 0;
/* get the average density of all the nets */
for (const AtomNetId& atom_net : atom_ctx.nlist.nets()) {
/* Skip the nets without any activity annotation */
if (0 == net_activity.count(atom_net)) {
continue;
}
/* Only care non-zero density nets */
if (0. == net_activity.at(atom_net).density) {
continue;
}
avg_density += net_activity.at(atom_net).density;
net_cnt++;
/* Consider the weight of fan-out */
size_t net_weight;
if (0 == std::distance(atom_ctx.nlist.net_sinks(atom_net).begin(), atom_ctx.nlist.net_sinks(atom_net).end())) {
net_weight = 1;
} else {
VTR_ASSERT(0 < std::distance(atom_ctx.nlist.net_sinks(atom_net).begin(), atom_ctx.nlist.net_sinks(atom_net).end()));
net_weight = std::distance(atom_ctx.nlist.net_sinks(atom_net).begin(), atom_ctx.nlist.net_sinks(atom_net).end());
}
weighted_avg_density += net_activity.at(atom_net).density* net_weight;
weighted_net_cnt += net_weight;
}
avg_density = avg_density / net_cnt;
weighted_avg_density = weighted_avg_density / weighted_net_cnt;
/* Sort the net density */
std::vector<float> net_densities;
net_densities.reserve(net_cnt);
for (const AtomNetId& atom_net : atom_ctx.nlist.nets()) {
/* Skip the nets without any activity annotation */
if (0 == net_activity.count(atom_net)) {
continue;
}
/* Only care non-zero density nets */
if (0. == net_activity.at(atom_net).density) {
continue;
}
net_densities.push_back(net_activity.at(atom_net).density);
}
std::sort(net_densities.begin(), net_densities.end());
/* Get the median */
float median_density = 0.;
/* check for even case */
if (net_cnt % 2 != 0) {
median_density = net_densities[size_t(net_cnt / 2)];
} else {
median_density = 0.5 * (net_densities[size_t((net_cnt - 1) / 2)] + net_densities[size_t((net_cnt - 1) / 2)]);
}
/* It may be more reasonable to use median
* But, if median density is 0, we use average density
*/
if ((0. == median_density) && (0. == avg_density)) {
recmd_num_sim_clock_cycle = 1;
VTR_LOG_WARN("All the signal density is zero!\nNumber of clock cycles in simulations are set to be %ld!\n",
recmd_num_sim_clock_cycle);
} else if (0. == avg_density) {
recmd_num_sim_clock_cycle = (int)round(1 / median_density);
} else if (0. == median_density) {
recmd_num_sim_clock_cycle = (int)round(1 / avg_density);
} else {
/* add a sim window size to balance the weight of average density and median density
* In practice, we find that there could be huge difference between avereage and median values
* For a reasonable number of simulation clock cycles, we do this window size.
*/
recmd_num_sim_clock_cycle = (int)round(1 / (sim_window_size * avg_density + (1 - sim_window_size) * median_density ));
}
VTR_ASSERT(0 < recmd_num_sim_clock_cycle);
VTR_LOG("Average net density: %.2f\n", avg_density);
VTR_LOG("Median net density: %.2f\n", median_density);
VTR_LOG("Average net density after weighting: %.2f\n", weighted_avg_density);
VTR_LOG("Window size set for Simulation: %.2f\n", sim_window_size);
VTR_LOG("Net density after Window size : %.2f\n",
(sim_window_size * avg_density + (1 - sim_window_size) * median_density));
VTR_LOG("Recommend no. of clock cycles: %ld\n", recmd_num_sim_clock_cycle);
return recmd_num_sim_clock_cycle;
}
/********************************************************************
* Annotate simulation setting based on VPR results
* - If the operating clock frequency is set to follow the vpr timing results,
* we will set a new operating clock frequency here
* - If the number of clock cycles in simulation is set to be automatically determined,
* we will infer the number based on the average signal density
*******************************************************************/
static
void annotate_simulation_setting(const AtomContext& atom_ctx,
const std::unordered_map<AtomNetId, t_net_power>& net_activity,
SimulationSetting& sim_setting) {
/* Find if the operating frequency is binded to vpr results */
if (0. == sim_setting.operating_clock_frequency()) {
VTR_LOG("User specified the operating clock frequency to use VPR results\n");
/* Run timing analysis and collect critical path delay
* This code is copied from function vpr_analysis() in vpr_api.h
* Should keep updated to latest VPR code base
* Note:
* - MUST mention in documentation that VPR should be run in timing enabled mode
*/
vtr::vector<ClusterNetId, float*> net_delay;
vtr::t_chunk net_delay_ch;
/* Load the net delays */
net_delay = alloc_net_delay(&net_delay_ch);
load_net_delay_from_routing(net_delay);
/* Do final timing analysis */
auto analysis_delay_calc = std::make_shared<AnalysisDelayCalculator>(atom_ctx.nlist, atom_ctx.lookup, net_delay);
auto timing_info = make_setup_hold_timing_info(analysis_delay_calc);
timing_info->update();
/* Get critical path delay. Update simulation settings */
float T_crit = timing_info->least_slack_critical_path().delay() * (1. + sim_setting.operating_clock_frequency_slack());
sim_setting.set_operating_clock_frequency(1 / T_crit);
VTR_LOG("Use VPR critical path delay %g [ns] with a %g [%] slack in OpenFPGA.\n",
T_crit / 1e9, sim_setting.operating_clock_frequency_slack() * 100);
}
VTR_LOG("Will apply operating clock frequency %g [MHz] to simulations\n",
sim_setting.operating_clock_frequency() / 1e6);
if (0. == sim_setting.num_clock_cycles()) {
/* Find the number of clock cycles to be used in simulation by average over the signal activity */
VTR_LOG("User specified the number of operating clock cycles to be inferred from signal activities\n");
size_t num_clock_cycles = recommend_num_sim_clock_cycle(atom_ctx,
net_activity,
0.5);
sim_setting.set_num_clock_cycles(num_clock_cycles);
VTR_LOG("Will apply %lu operating clock cycles to simulations\n",
sim_setting.num_clock_cycles());
}
}
/********************************************************************
* Top-level function to link openfpga architecture to VPR, including:
* - physical pb_type
* - mode selection bits for pb_type and pb interconnect
* - circuit models for pb_type and pb interconnect
* - physical pb_graph nodes and pb_graph pins
* - circuit models for global routing architecture
*******************************************************************/
void link_arch(OpenfpgaContext& openfpga_ctx,
const Command& cmd, const CommandContext& cmd_context) {
vtr::ScopedStartFinishTimer timer("Link OpenFPGA architecture to VPR architecture");
CommandOptionId opt_activity_file = cmd.option("activity_file");
CommandOptionId opt_verbose = cmd.option("verbose");
/* Annotate pb_type graphs
* - physical pb_type
* - mode selection bits for pb_type and pb interconnect
* - circuit models for pb_type and pb interconnect
*/
annotate_pb_types(g_vpr_ctx.device(), openfpga_ctx.arch(),
openfpga_ctx.mutable_vpr_device_annotation(),
cmd_context.option_enable(cmd, opt_verbose));
2020-01-30 17:40:13 -06:00
/* Annotate pb_graph_nodes
* - Give unique index to each node in the same type
* - Bind operating pb_graph_node to their physical pb_graph_node
* - Bind pins from operating pb_graph_node to their physical pb_graph_node pins
*/
annotate_pb_graph(g_vpr_ctx.device(),
openfpga_ctx.mutable_vpr_device_annotation(),
cmd_context.option_enable(cmd, opt_verbose));
/* Annotate routing architecture to circuit library */
annotate_rr_graph_circuit_models(g_vpr_ctx.device(),
openfpga_ctx.arch(),
openfpga_ctx.mutable_vpr_device_annotation(),
cmd_context.option_enable(cmd, opt_verbose));
/* Annotate net mapping to each rr_node
*/
openfpga_ctx.mutable_vpr_routing_annotation().init(g_vpr_ctx.device().rr_graph);
annotate_rr_node_nets(g_vpr_ctx.device(), g_vpr_ctx.clustering(), g_vpr_ctx.routing(),
openfpga_ctx.mutable_vpr_routing_annotation(),
cmd_context.option_enable(cmd, opt_verbose));
2020-02-11 17:37:14 -06:00
/* Build the routing graph annotation
* - RRGSB
* - DeviceRRGSB
*/
2020-02-12 15:58:23 -06:00
if (false == is_vpr_rr_graph_supported(g_vpr_ctx.device().rr_graph)) {
return;
}
2020-02-11 17:37:14 -06:00
annotate_device_rr_gsb(g_vpr_ctx.device(),
openfpga_ctx.mutable_device_rr_gsb(),
2020-02-11 17:37:14 -06:00
cmd_context.option_enable(cmd, opt_verbose));
2020-02-12 15:58:23 -06:00
/* Build multiplexer library */
openfpga_ctx.mutable_mux_lib() = build_device_mux_library(g_vpr_ctx.device(),
const_cast<const OpenfpgaContext&>(openfpga_ctx));
2020-02-14 23:21:32 -06:00
/* Build tile direct annotation */
openfpga_ctx.mutable_tile_direct() = build_device_tile_direct(g_vpr_ctx.device(),
openfpga_ctx.arch().arch_direct);
/* Annotate placement results */
annotate_mapped_blocks(g_vpr_ctx.device(),
g_vpr_ctx.clustering(),
g_vpr_ctx.placement(),
openfpga_ctx.mutable_vpr_placement_annotation());
/* Read activity file is manadatory in the following flow-run settings
* - When users specify that number of clock cycles
* should be inferred from FPGA implmentation
* - When FPGA-SPICE is enabled
*/
openfpga_ctx.mutable_net_activity() = read_activity(g_vpr_ctx.atom().nlist,
cmd_context.option_value(cmd, opt_activity_file).c_str());
/* TODO: Annotate the number of clock cycles and clock frequency by following VPR results
* We SHOULD create a new simulation setting for OpenFPGA use only
* Avoid overwrite the raw data achieved when parsing!!!
*/
annotate_simulation_setting(g_vpr_ctx.atom(),
openfpga_ctx.net_activity(),
openfpga_ctx.mutable_arch().sim_setting);
}
} /* end namespace openfpga */