472 lines
15 KiB
C++
472 lines
15 KiB
C++
/*********************************************************************
|
|
* The following code is part of the power modelling feature of VTR.
|
|
*
|
|
* For support:
|
|
* http://code.google.com/p/vtr-verilog-to-routing/wiki/Power
|
|
*
|
|
* or email:
|
|
* vtr.power.estimation@gmail.com
|
|
*
|
|
* If you are using power estimation for your researach please cite:
|
|
*
|
|
* Jeffrey Goeders and Steven Wilton. VersaPower: Power Estimation
|
|
* for Diverse FPGA Architectures. In International Conference on
|
|
* Field Programmable Technology, 2012.
|
|
*
|
|
********************************************************************/
|
|
|
|
/**
|
|
* This file provides utility functions used by power estimation.
|
|
*/
|
|
|
|
/************************* INCLUDES *********************************/
|
|
#include <cstring>
|
|
#include <cmath>
|
|
#include <map>
|
|
|
|
#include "vtr_assert.h"
|
|
#include "vtr_memory.h"
|
|
|
|
#include "power_util.h"
|
|
#include "globals.h"
|
|
#include "atom_netlist.h"
|
|
#include "atom_netlist_utils.h"
|
|
|
|
/************************* GLOBALS **********************************/
|
|
|
|
/************************* FUNCTION DECLARATIONS*********************/
|
|
static void log_msg(t_log* log_ptr, const char* msg);
|
|
static void init_mux_arch_default(t_mux_arch* mux_arch, int levels, int num_inputs, float transistor_size);
|
|
static void alloc_and_load_mux_graph_recursive(t_mux_node* node,
|
|
int num_primary_inputs,
|
|
int level,
|
|
int starting_pin_idx);
|
|
static t_mux_node* alloc_and_load_mux_graph(int num_inputs, int levels);
|
|
|
|
/************************* FUNCTION DEFINITIONS *********************/
|
|
void power_zero_usage(t_power_usage* power_usage) {
|
|
power_usage->dynamic = 0.;
|
|
power_usage->leakage = 0.;
|
|
}
|
|
|
|
void power_add_usage(t_power_usage* dest, const t_power_usage* src) {
|
|
dest->dynamic += src->dynamic;
|
|
dest->leakage += src->leakage;
|
|
}
|
|
|
|
void power_scale_usage(t_power_usage* power_usage, float scale_factor) {
|
|
power_usage->dynamic *= scale_factor;
|
|
power_usage->leakage *= scale_factor;
|
|
}
|
|
|
|
float power_sum_usage(t_power_usage* power_usage) {
|
|
return power_usage->dynamic + power_usage->leakage;
|
|
}
|
|
|
|
float power_perc_dynamic(t_power_usage* power_usage) {
|
|
return power_usage->dynamic / power_sum_usage(power_usage);
|
|
}
|
|
|
|
void power_log_msg(e_power_log_type log_type, const char* msg) {
|
|
auto& power_ctx = g_vpr_ctx.power();
|
|
log_msg(&power_ctx.output->logs[log_type], msg);
|
|
}
|
|
|
|
const char* transistor_type_name(e_tx_type type) {
|
|
if (type == NMOS) {
|
|
return "NMOS";
|
|
} else if (type == PMOS) {
|
|
return "PMOS";
|
|
} else {
|
|
return "Unknown";
|
|
}
|
|
}
|
|
|
|
float pin_dens(t_pb* pb, t_pb_graph_pin* pin, ClusterBlockId iblk) {
|
|
float density = 0.;
|
|
|
|
auto& cluster_ctx = g_vpr_ctx.clustering();
|
|
auto& power_ctx = g_vpr_ctx.mutable_power();
|
|
|
|
if (pb) {
|
|
if (cluster_ctx.clb_nlist.block_pb(iblk)->pb_route.count(pin->pin_count_in_cluster)) {
|
|
AtomNetId net_id = cluster_ctx.clb_nlist.block_pb(iblk)->pb_route[pin->pin_count_in_cluster].atom_net_id;
|
|
density = power_ctx.atom_net_power[net_id].density;
|
|
}
|
|
}
|
|
|
|
return density;
|
|
}
|
|
|
|
float pin_prob(t_pb* pb, t_pb_graph_pin* pin, ClusterBlockId iblk) {
|
|
/* Assumed pull-up on unused interconnect */
|
|
float prob = 1.;
|
|
|
|
auto& cluster_ctx = g_vpr_ctx.clustering();
|
|
auto& power_ctx = g_vpr_ctx.mutable_power();
|
|
|
|
if (pb) {
|
|
if (cluster_ctx.clb_nlist.block_pb(iblk)->pb_route.count(pin->pin_count_in_cluster)) {
|
|
AtomNetId net_id = cluster_ctx.clb_nlist.block_pb(iblk)->pb_route[pin->pin_count_in_cluster].atom_net_id;
|
|
prob = power_ctx.atom_net_power[net_id].probability;
|
|
}
|
|
}
|
|
|
|
return prob;
|
|
}
|
|
|
|
/**
|
|
* This function determines the values of the selectors in a static mux, based
|
|
* on the routing information.
|
|
* - selector_values: (Return values) selected index at each mux level
|
|
* - mux_node:
|
|
* - selected_input_pin: The input index to the multi-level mux that is chosen
|
|
*/
|
|
bool mux_find_selector_values(int* selector_values, t_mux_node* mux_node, int selected_input_pin) {
|
|
if (mux_node->level == 0) {
|
|
if ((selected_input_pin >= mux_node->starting_pin_idx)
|
|
&& (selected_input_pin
|
|
<= (mux_node->starting_pin_idx + mux_node->num_inputs))) {
|
|
selector_values[mux_node->level] = selected_input_pin
|
|
- mux_node->starting_pin_idx;
|
|
return true;
|
|
}
|
|
} else {
|
|
int input_idx;
|
|
for (input_idx = 0; input_idx < mux_node->num_inputs; input_idx++) {
|
|
if (mux_find_selector_values(selector_values,
|
|
&mux_node->children[input_idx], selected_input_pin)) {
|
|
selector_values[mux_node->level] = input_idx;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void log_msg(t_log* log_ptr, const char* msg) {
|
|
int msg_idx;
|
|
|
|
/* Check if this message is already in the log */
|
|
for (msg_idx = 0; msg_idx < log_ptr->num_messages; msg_idx++) {
|
|
if (strcmp(log_ptr->messages[msg_idx], msg) == 0) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (log_ptr->num_messages <= MAX_LOGS) {
|
|
log_ptr->num_messages++;
|
|
log_ptr->messages = (char**)vtr::realloc(log_ptr->messages,
|
|
log_ptr->num_messages * sizeof(char*));
|
|
} else {
|
|
/* Can't add any more messages */
|
|
return;
|
|
}
|
|
|
|
if (log_ptr->num_messages == (MAX_LOGS + 1)) {
|
|
const char* full_msg = "\n***LOG IS FULL***\n";
|
|
log_ptr->messages[log_ptr->num_messages - 1] = (char*)vtr::calloc(strlen(full_msg) + 1, sizeof(char));
|
|
strncpy(log_ptr->messages[log_ptr->num_messages - 1], full_msg, strlen(full_msg) + 1);
|
|
} else {
|
|
size_t len = strlen(msg) + 1;
|
|
log_ptr->messages[log_ptr->num_messages - 1] = (char*)vtr::calloc(len, sizeof(char));
|
|
strncpy(log_ptr->messages[log_ptr->num_messages - 1], msg, len);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculates the number of buffer stages required, to achieve a given buffer fanout
|
|
* final_stage_size: Size of the final inverter in the buffer, relative to a min size
|
|
* desired_stage_effort: The desired gain between stages, typically 4
|
|
*/
|
|
int power_calc_buffer_num_stages(float final_stage_size,
|
|
float desired_stage_effort) {
|
|
int N = 1;
|
|
|
|
if (final_stage_size <= 1.0) {
|
|
N = 1;
|
|
} else if (final_stage_size < desired_stage_effort)
|
|
N = 2;
|
|
else {
|
|
N = (int)(log(final_stage_size) / log(desired_stage_effort) + 1);
|
|
|
|
/* We always round down.
|
|
* Perhaps N+1 would be closer to the desired stage effort, but the delay savings
|
|
* would likely not be worth the extra power/area
|
|
*/
|
|
}
|
|
|
|
return N;
|
|
}
|
|
|
|
/**
|
|
* Calculates the required effort of each stage of a buffer
|
|
* - N: The number of stages of the buffer
|
|
* - final_stage_size: Size of the final inverter in the buffer, relative to a min size
|
|
*/
|
|
float calc_buffer_stage_effort(int N, float final_stage_size) {
|
|
if (N > 1)
|
|
return pow((double)final_stage_size, (1.0 / ((double)N - 1)));
|
|
else
|
|
return 1.0;
|
|
}
|
|
|
|
/**
|
|
* This functions returns the LUT SRAM values from the given logic terms
|
|
* - LUT_size: The number of LUT inputs
|
|
* - truth_table: The logic terms saved from the BLIF file
|
|
*/
|
|
char* alloc_SRAM_values_from_truth_table(int LUT_size,
|
|
const AtomNetlist::TruthTable& truth_table) {
|
|
int num_SRAM_bits = 1 << LUT_size;
|
|
|
|
//SRAM value stored as a string of '0' and '1' characters
|
|
// Initialize to all zeros
|
|
char* SRAM_values = (char*)vtr::calloc(num_SRAM_bits + 1, sizeof(char));
|
|
SRAM_values[num_SRAM_bits] = '\0';
|
|
|
|
if (truth_table.empty()) {
|
|
for (int i = 0; i < num_SRAM_bits; i++) {
|
|
SRAM_values[i] = '1';
|
|
}
|
|
return SRAM_values;
|
|
}
|
|
|
|
/* Check if this is an unconnected node - hopefully these will be
|
|
* ignored by VPR in the future
|
|
*/
|
|
if (truth_table.size() == 1) {
|
|
//Single row check to see if a constant node
|
|
if (truth_table[0].size() == 1) {
|
|
if (truth_table[0][0] == vtr::LogicValue::TRUE) {
|
|
//Mark all the SRAM values as ON
|
|
for (int i = 0; i < num_SRAM_bits; i++) {
|
|
SRAM_values[i] = '1';
|
|
}
|
|
return SRAM_values;
|
|
} else {
|
|
VTR_ASSERT(truth_table[0][0] == vtr::LogicValue::FALSE);
|
|
return SRAM_values;
|
|
}
|
|
}
|
|
}
|
|
auto expanded_truth_table = expand_truth_table(truth_table, LUT_size);
|
|
std::vector<vtr::LogicValue> lut_mask = truth_table_to_lut_mask(expanded_truth_table, LUT_size);
|
|
|
|
VTR_ASSERT(lut_mask.size() == (size_t)num_SRAM_bits);
|
|
|
|
//Convert to string
|
|
for (size_t i = 0; i < lut_mask.size(); ++i) {
|
|
switch (lut_mask[i]) {
|
|
case vtr::LogicValue::TRUE:
|
|
SRAM_values[i] = '1';
|
|
break;
|
|
case vtr::LogicValue::FALSE:
|
|
SRAM_values[i] = '0';
|
|
break;
|
|
default:
|
|
VTR_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
return SRAM_values;
|
|
}
|
|
|
|
/* Reduce mux levels for multiplexers that are too small for the preset number of levels */
|
|
void mux_arch_fix_levels(t_mux_arch* mux_arch) {
|
|
while (((1 << mux_arch->levels) > mux_arch->num_inputs)
|
|
&& (mux_arch->levels > 1)) {
|
|
mux_arch->levels--;
|
|
}
|
|
}
|
|
|
|
float clb_net_density(ClusterNetId net_idx) {
|
|
if (net_idx == ClusterNetId::INVALID()) {
|
|
return 0.;
|
|
} else {
|
|
auto& power_ctx = g_vpr_ctx.power();
|
|
return power_ctx.clb_net_power[net_idx].density;
|
|
}
|
|
}
|
|
|
|
float clb_net_prob(ClusterNetId net_idx) {
|
|
if (net_idx == ClusterNetId::INVALID()) {
|
|
return 0.;
|
|
} else {
|
|
auto& power_ctx = g_vpr_ctx.power();
|
|
return power_ctx.clb_net_power[net_idx].probability;
|
|
}
|
|
}
|
|
|
|
const char* interconnect_type_name(enum e_interconnect type) {
|
|
switch (type) {
|
|
case COMPLETE_INTERC:
|
|
return "complete";
|
|
case MUX_INTERC:
|
|
return "mux";
|
|
case DIRECT_INTERC:
|
|
return "direct";
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
void output_log(t_log* log_ptr, FILE* fp) {
|
|
int msg_idx;
|
|
|
|
for (msg_idx = 0; msg_idx < log_ptr->num_messages; msg_idx++) {
|
|
fprintf(fp, "%s\n", log_ptr->messages[msg_idx]);
|
|
}
|
|
}
|
|
|
|
void output_logs(FILE* fp, t_log* logs, int num_logs) {
|
|
int log_idx;
|
|
|
|
for (log_idx = 0; log_idx < num_logs; log_idx++) {
|
|
if (logs[log_idx].num_messages) {
|
|
power_print_title(fp, logs[log_idx].name);
|
|
output_log(&logs[log_idx], fp);
|
|
fprintf(fp, "\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
float power_buffer_size_from_logical_effort(float C_load) {
|
|
auto& power_ctx = g_vpr_ctx.power();
|
|
return std::max(1.0f,
|
|
C_load / power_ctx.commonly_used->INV_1X_C_in / (2 * power_ctx.arch->logical_effort_factor));
|
|
}
|
|
|
|
void power_print_title(FILE* fp, const char* title) {
|
|
int i;
|
|
const int width = 80;
|
|
|
|
int firsthalf = (width - strlen(title) - 2) / 2;
|
|
int secondhalf = width - strlen(title) - 2 - firsthalf;
|
|
|
|
for (i = 1; i <= firsthalf; i++)
|
|
fprintf(fp, "-");
|
|
fprintf(fp, " %s ", title);
|
|
for (i = 1; i <= secondhalf; i++)
|
|
fprintf(fp, "-");
|
|
fprintf(fp, "\n");
|
|
}
|
|
|
|
t_mux_arch* power_get_mux_arch(int num_mux_inputs, float transistor_size) {
|
|
int i;
|
|
|
|
t_power_mux_info* mux_info = nullptr;
|
|
auto& power_ctx = g_vpr_ctx.power();
|
|
|
|
/* Find the mux archs for the given transistor size */
|
|
std::map<float, t_power_mux_info*>::iterator it;
|
|
|
|
it = power_ctx.commonly_used->mux_info.find(transistor_size);
|
|
|
|
if (it == power_ctx.commonly_used->mux_info.end()) {
|
|
mux_info = new t_power_mux_info;
|
|
mux_info->mux_arch = nullptr;
|
|
mux_info->mux_arch_max_size = 0;
|
|
VTR_ASSERT(power_ctx.commonly_used->mux_info[transistor_size] == nullptr);
|
|
power_ctx.commonly_used->mux_info[transistor_size] = mux_info;
|
|
} else {
|
|
mux_info = it->second;
|
|
}
|
|
|
|
if (num_mux_inputs > mux_info->mux_arch_max_size) {
|
|
mux_info->mux_arch = (t_mux_arch*)vtr::realloc(mux_info->mux_arch,
|
|
(num_mux_inputs + 1) * sizeof(t_mux_arch));
|
|
|
|
for (i = mux_info->mux_arch_max_size + 1; i <= num_mux_inputs; i++) {
|
|
init_mux_arch_default(&mux_info->mux_arch[i], 2, i,
|
|
transistor_size);
|
|
}
|
|
mux_info->mux_arch_max_size = num_mux_inputs;
|
|
}
|
|
return &mux_info->mux_arch[num_mux_inputs];
|
|
}
|
|
|
|
/**
|
|
* Generates a default multiplexer architecture of given size and number of levels
|
|
*/
|
|
static void init_mux_arch_default(t_mux_arch* mux_arch, int levels, int num_inputs, float transistor_size) {
|
|
mux_arch->levels = levels;
|
|
mux_arch->num_inputs = num_inputs;
|
|
|
|
mux_arch_fix_levels(mux_arch);
|
|
|
|
mux_arch->transistor_size = transistor_size;
|
|
|
|
mux_arch->mux_graph_head = alloc_and_load_mux_graph(num_inputs,
|
|
mux_arch->levels);
|
|
}
|
|
|
|
/**
|
|
* Allocates a builds a multiplexer graph with given # inputs and levels
|
|
*/
|
|
static t_mux_node* alloc_and_load_mux_graph(int num_inputs, int levels) {
|
|
t_mux_node* node;
|
|
|
|
node = (t_mux_node*)vtr::malloc(sizeof(t_mux_node));
|
|
alloc_and_load_mux_graph_recursive(node, num_inputs, levels - 1, 0);
|
|
|
|
return node;
|
|
}
|
|
|
|
static void alloc_and_load_mux_graph_recursive(t_mux_node* node,
|
|
int num_primary_inputs,
|
|
int level,
|
|
int starting_pin_idx) {
|
|
int child_idx;
|
|
int pin_idx = starting_pin_idx;
|
|
|
|
node->num_inputs = (int)(pow(num_primary_inputs, 1 / ((float)level + 1))
|
|
+ 0.5);
|
|
node->level = level;
|
|
node->starting_pin_idx = starting_pin_idx;
|
|
|
|
if (level != 0) {
|
|
node->children = (t_mux_node*)vtr::calloc(node->num_inputs,
|
|
sizeof(t_mux_node));
|
|
for (child_idx = 0; child_idx < node->num_inputs; child_idx++) {
|
|
int num_child_pi = num_primary_inputs / node->num_inputs;
|
|
if (child_idx < (num_primary_inputs % node->num_inputs)) {
|
|
num_child_pi++;
|
|
}
|
|
alloc_and_load_mux_graph_recursive(&node->children[child_idx],
|
|
num_child_pi, level - 1, pin_idx);
|
|
pin_idx += num_child_pi;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool power_method_is_transistor_level(e_power_estimation_method estimation_method) {
|
|
switch (estimation_method) {
|
|
case POWER_METHOD_AUTO_SIZES:
|
|
case POWER_METHOD_SPECIFY_SIZES:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool power_method_is_recursive(e_power_estimation_method method) {
|
|
switch (method) {
|
|
case POWER_METHOD_IGNORE:
|
|
case POWER_METHOD_TOGGLE_PINS:
|
|
case POWER_METHOD_C_INTERNAL:
|
|
case POWER_METHOD_ABSOLUTE:
|
|
return false;
|
|
case POWER_METHOD_AUTO_SIZES:
|
|
case POWER_METHOD_SPECIFY_SIZES:
|
|
case POWER_METHOD_SUM_OF_CHILDREN:
|
|
return true;
|
|
case POWER_METHOD_UNDEFINED:
|
|
default:
|
|
VTR_ASSERT(0);
|
|
}
|
|
|
|
// to get rid of warning
|
|
return false;
|
|
}
|