Merge pull request #114 from LNIS-Projects/dev
Support I/O interfaces for Embedded FPGAs
This commit is contained in:
commit
5d41cc6d23
|
@ -43,6 +43,9 @@ python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/io/multi_io_capacity
|
|||
echo -e "Testing Verilog generation with I/Os only on left and right sides of an FPGA ";
|
||||
python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/io/reduced_io --debug --show_thread_logs
|
||||
|
||||
echo -e "Testing Verilog generation with embedded I/Os for an FPGA ";
|
||||
python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/io/embedded_io --debug --show_thread_logs
|
||||
|
||||
echo -e "Testing Verilog generation with adder chain across an FPGA";
|
||||
python3 openfpga_flow/scripts/run_fpga_task.py fpga_verilog/fabric_chain/adder_chain --debug --show_thread_logs
|
||||
|
||||
|
|
|
@ -562,6 +562,66 @@ int check_power_gated_circuit_models(const CircuitLibrary& circuit_lib) {
|
|||
return num_err;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* Check io has been defined and has input and output ports
|
||||
* - We must have global I/O port, either its type is inout, input or output
|
||||
* - For each IOPAD, we must have at least an input an output
|
||||
***********************************************************************/
|
||||
static
|
||||
size_t check_io_circuit_model(const CircuitLibrary& circuit_lib) {
|
||||
size_t num_err = 0;
|
||||
|
||||
/* Embedded I/O interface may not have inout port
|
||||
* iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_INOUT);
|
||||
* Some I/Os may not have SRAM port, such as AIB interface
|
||||
* iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_SRAM);
|
||||
*/
|
||||
std::vector<enum e_circuit_model_port_type> iopad_port_types_required;
|
||||
iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_INOUT);
|
||||
num_err += check_circuit_model_port_required(circuit_lib, CIRCUIT_MODEL_IOPAD, iopad_port_types_required);
|
||||
|
||||
/* Each I/O cell must have
|
||||
* - One of the following ports
|
||||
* - At least 1 ASIC-to-FPGA (A2F) port that is defined as global I/O
|
||||
* - At least 1 FPGA-to-ASIC (F2A) port that is defined as global I/O!
|
||||
* - At least 1 regular port that is non-global which is connected to global routing architecture
|
||||
*/
|
||||
for (const auto& io_model : circuit_lib.models_by_type(CIRCUIT_MODEL_IOPAD)) {
|
||||
bool has_global_io = false;
|
||||
bool has_internal_connection = false;
|
||||
|
||||
for (const auto& port : circuit_lib.model_ports(io_model)) {
|
||||
if ( (true == circuit_lib.port_is_io(port)
|
||||
&& (true == circuit_lib.port_is_global(port)))) {
|
||||
has_global_io = true;
|
||||
continue; /* Go to next */
|
||||
}
|
||||
if ( (false == circuit_lib.port_is_io(port)
|
||||
&& (false == circuit_lib.port_is_global(port)))
|
||||
&& (CIRCUIT_MODEL_PORT_SRAM != circuit_lib.port_type(port))) {
|
||||
has_internal_connection = true;
|
||||
continue; /* Go to next */
|
||||
}
|
||||
}
|
||||
|
||||
if (false == has_global_io) {
|
||||
VTR_LOGF_ERROR(__FILE__, __LINE__,
|
||||
"I/O circuit model '%s' does not have any I/O port defined!\n",
|
||||
circuit_lib.model_name(io_model).c_str());
|
||||
num_err++;
|
||||
}
|
||||
|
||||
if (false == has_internal_connection) {
|
||||
VTR_LOGF_ERROR(__FILE__, __LINE__,
|
||||
"I/O circuit model '%s' does not have any port connected to FPGA core!\n",
|
||||
circuit_lib.model_name(io_model).c_str());
|
||||
num_err++;
|
||||
}
|
||||
}
|
||||
|
||||
return num_err;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* Check points to make sure we have a valid circuit library
|
||||
* Detailed checkpoints:
|
||||
|
@ -575,6 +635,10 @@ int check_power_gated_circuit_models(const CircuitLibrary& circuit_lib) {
|
|||
* 8. FF must have at least a clock, an input and an output ports
|
||||
* 9. LUT must have at least an input, an output and a SRAM ports
|
||||
* 10. We must have default circuit models for these types: MUX, channel wires and wires
|
||||
*
|
||||
* Note:
|
||||
* - NO modification on the circuit library is allowed!
|
||||
* The circuit library should be read-only!!!
|
||||
***********************************************************************/
|
||||
bool check_circuit_library(const CircuitLibrary& circuit_lib) {
|
||||
size_t num_err = 0;
|
||||
|
@ -595,20 +659,11 @@ bool check_circuit_library(const CircuitLibrary& circuit_lib) {
|
|||
num_err += check_circuit_library_ports(circuit_lib);
|
||||
|
||||
/* 3. Check io has been defined and has input and output ports
|
||||
* [a] We must have an IOPAD!
|
||||
* [b] For each IOPAD, we must have at least an input, an output, an INOUT and an SRAM port
|
||||
* [a] We must have global I/O port, either its type is inout, input or output
|
||||
* [b] For each IOPAD, we must have at least an input an output
|
||||
*/
|
||||
num_err += check_circuit_model_required(circuit_lib, CIRCUIT_MODEL_IOPAD);
|
||||
|
||||
std::vector<enum e_circuit_model_port_type> iopad_port_types_required;
|
||||
iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_INPUT);
|
||||
iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_OUTPUT);
|
||||
iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_INOUT);
|
||||
/* Some I/Os may not have SRAM port, such as AIB interface
|
||||
* iopad_port_types_required.push_back(CIRCUIT_MODEL_PORT_SRAM);
|
||||
*/
|
||||
|
||||
num_err += check_circuit_model_port_required(circuit_lib, CIRCUIT_MODEL_IOPAD, iopad_port_types_required);
|
||||
num_err += check_io_circuit_model(circuit_lib);
|
||||
|
||||
/* 4. Check mux has been defined and has input and output ports
|
||||
* [a] We must have a MUX!
|
||||
|
|
|
@ -11,7 +11,10 @@ namespace openfpga {
|
|||
/**************************************************
|
||||
* Public Accessors
|
||||
*************************************************/
|
||||
size_t IoLocationMap::io_index(const size_t& x, const size_t& y, const size_t& z) const {
|
||||
size_t IoLocationMap::io_index(const size_t& x,
|
||||
const size_t& y,
|
||||
const size_t& z,
|
||||
const std::string& io_port_name) const {
|
||||
if (x >= io_indices_.size()) {
|
||||
return size_t(-1);
|
||||
}
|
||||
|
@ -24,10 +27,19 @@ size_t IoLocationMap::io_index(const size_t& x, const size_t& y, const size_t& z
|
|||
return size_t(-1);
|
||||
}
|
||||
|
||||
return io_indices_[x][y][z];
|
||||
auto result = io_indices_[x][y][z].find(io_port_name);
|
||||
if (result == io_indices_[x][y][z].end()) {
|
||||
return size_t(-1);
|
||||
}
|
||||
|
||||
void IoLocationMap::set_io_index(const size_t& x, const size_t& y, const size_t& z, const size_t& io_index) {
|
||||
return result->second;
|
||||
}
|
||||
|
||||
void IoLocationMap::set_io_index(const size_t& x,
|
||||
const size_t& y,
|
||||
const size_t& z,
|
||||
const std::string& io_port_name,
|
||||
const size_t& io_index) {
|
||||
if (x >= io_indices_.size()) {
|
||||
io_indices_.resize(x + 1);
|
||||
}
|
||||
|
@ -40,7 +52,7 @@ void IoLocationMap::set_io_index(const size_t& x, const size_t& y, const size_t&
|
|||
io_indices_[x][y].resize(z + 1);
|
||||
}
|
||||
|
||||
io_indices_[x][y][z] = io_index;
|
||||
io_indices_[x][y][z][io_port_name] = io_index;
|
||||
}
|
||||
|
||||
} /* end namespace openfpga */
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
*******************************************************************/
|
||||
#include <stddef.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
/* Begin namespace openfpga */
|
||||
namespace openfpga {
|
||||
|
@ -15,24 +17,32 @@ namespace openfpga {
|
|||
* in the FPGA fabric, i.e., the module graph, and logical location
|
||||
* of the I/O in VPR coordinate system
|
||||
*
|
||||
* For example
|
||||
* io[0] io[1] io[2]
|
||||
* +-----------------+ +--------+
|
||||
* | | | | |
|
||||
* | I/O | I/O | | I/O |
|
||||
* | [0][y] | [0][y] | | [1][y] |
|
||||
* | [0] | [1] | | [0] |
|
||||
* +-----------------+ +--------+
|
||||
* For example:
|
||||
*
|
||||
* ioA[0] ioA[1] ioB[0] ioB[1] ioA[2]
|
||||
* +-----------------+ +--------+--------+ +--------+
|
||||
* | | | | | | | |
|
||||
* | I/O | I/O | | I/O | I/O | | I/O |
|
||||
* | [0][y] | [0][y] | | [1][y] | [1][y] | | [2][y] | ...
|
||||
* | [0] | [1] | | [0] | [1] | | [0] |
|
||||
* +-----------------+ +--------+--------+ +--------+
|
||||
*
|
||||
*******************************************************************/
|
||||
class IoLocationMap {
|
||||
public: /* Public aggregators */
|
||||
size_t io_index(const size_t& x, const size_t& y, const size_t& z) const;
|
||||
size_t io_index(const size_t& x,
|
||||
const size_t& y,
|
||||
const size_t& z,
|
||||
const std::string& io_port_name) const;
|
||||
public: /* Public mutators */
|
||||
void set_io_index(const size_t& x, const size_t& y, const size_t& z, const size_t& io_index);
|
||||
void set_io_index(const size_t& x,
|
||||
const size_t& y,
|
||||
const size_t& z,
|
||||
const std::string& io_port_name,
|
||||
const size_t& io_index);
|
||||
private: /* Internal Data */
|
||||
/* I/O index fast lookup by [x][y][z] location */
|
||||
std::vector<std::vector<std::vector<size_t>>> io_indices_;
|
||||
std::vector<std::vector<std::vector<std::map<std::string, size_t>>>> io_indices_;
|
||||
};
|
||||
|
||||
} /* End namespace openfpga*/
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "build_device_module.h"
|
||||
#include "fabric_hierarchy_writer.h"
|
||||
#include "fabric_key_writer.h"
|
||||
#include "build_fabric_io_location_map.h"
|
||||
#include "openfpga_build_fabric.h"
|
||||
|
||||
/* Include global variables of VPR */
|
||||
|
@ -100,7 +101,6 @@ int build_fabric(OpenfpgaContext& openfpga_ctx,
|
|||
VTR_LOG("\n");
|
||||
|
||||
curr_status = build_device_module_graph(openfpga_ctx.mutable_module_graph(),
|
||||
openfpga_ctx.mutable_io_location_map(),
|
||||
openfpga_ctx.mutable_decoder_lib(),
|
||||
const_cast<const OpenfpgaContext&>(openfpga_ctx),
|
||||
g_vpr_ctx.device(),
|
||||
|
@ -116,6 +116,10 @@ int build_fabric(OpenfpgaContext& openfpga_ctx,
|
|||
final_status = curr_status;
|
||||
}
|
||||
|
||||
/* Build I/O location map */
|
||||
openfpga_ctx.mutable_io_location_map() = build_fabric_io_location_map(openfpga_ctx.module_graph(),
|
||||
g_vpr_ctx.device().grid);
|
||||
|
||||
/* Output fabric key if user requested */
|
||||
if (true == cmd_context.option_enable(cmd, opt_write_fabric_key)) {
|
||||
std::string fkey_fname = cmd_context.option_value(cmd, opt_write_fabric_key);
|
||||
|
|
|
@ -30,7 +30,6 @@ namespace openfpga {
|
|||
* for a FPGA fabric
|
||||
*******************************************************************/
|
||||
int build_device_module_graph(ModuleManager& module_manager,
|
||||
IoLocationMap& io_location_map,
|
||||
DecoderLibrary& decoder_lib,
|
||||
const OpenfpgaContext& openfpga_ctx,
|
||||
const DeviceContext& vpr_device_ctx,
|
||||
|
@ -112,7 +111,6 @@ int build_device_module_graph(ModuleManager& module_manager,
|
|||
|
||||
/* Build FPGA fabric top-level module */
|
||||
status = build_top_module(module_manager,
|
||||
io_location_map,
|
||||
decoder_lib,
|
||||
openfpga_ctx.arch().circuit_lib,
|
||||
vpr_device_ctx.grid,
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
namespace openfpga {
|
||||
|
||||
int build_device_module_graph(ModuleManager& module_manager,
|
||||
IoLocationMap& io_location_map,
|
||||
DecoderLibrary& decoder_lib,
|
||||
const OpenfpgaContext& openfpga_ctx,
|
||||
const DeviceContext& vpr_device_ctx,
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
/********************************************************************
|
||||
* This file includes functions that are used to build the location
|
||||
* map information for the top-level module of the FPGA fabric
|
||||
* It helps OpenFPGA to link the I/O port index in top-level module
|
||||
* to the VPR I/O mapping results
|
||||
*******************************************************************/
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
/* Headers from vtrutil library */
|
||||
#include "vtr_assert.h"
|
||||
#include "vtr_time.h"
|
||||
#include "vtr_log.h"
|
||||
|
||||
/* Headers from vpr library */
|
||||
#include "vpr_utils.h"
|
||||
|
||||
#include "openfpga_reserved_words.h"
|
||||
#include "openfpga_naming.h"
|
||||
|
||||
#include "build_fabric_io_location_map.h"
|
||||
|
||||
/* begin namespace openfpga */
|
||||
namespace openfpga {
|
||||
|
||||
/********************************************************************
|
||||
* Find all the GPIO ports in the grid module
|
||||
* and cache their port/pin index in the top-level module
|
||||
*******************************************************************/
|
||||
IoLocationMap build_fabric_io_location_map(const ModuleManager& module_manager,
|
||||
const DeviceGrid& grids) {
|
||||
vtr::ScopedStartFinishTimer timer("Create I/O location mapping for top module");
|
||||
|
||||
IoLocationMap io_location_map;
|
||||
|
||||
std::map<std::string, size_t> io_counter;
|
||||
|
||||
/* Create the coordinate range for each side of FPGA fabric */
|
||||
std::vector<e_side> io_sides{TOP, RIGHT, BOTTOM, LEFT};
|
||||
std::map<e_side, std::vector<vtr::Point<size_t>>> io_coordinates;
|
||||
|
||||
/* TOP side*/
|
||||
for (size_t ix = 1; ix < grids.width() - 1; ++ix) {
|
||||
io_coordinates[TOP].push_back(vtr::Point<size_t>(ix, grids.height() - 1));
|
||||
}
|
||||
|
||||
/* RIGHT side */
|
||||
for (size_t iy = 1; iy < grids.height() - 1; ++iy) {
|
||||
io_coordinates[RIGHT].push_back(vtr::Point<size_t>(grids.width() - 1, iy));
|
||||
}
|
||||
|
||||
/* BOTTOM side*/
|
||||
for (size_t ix = 1; ix < grids.width() - 1; ++ix) {
|
||||
io_coordinates[BOTTOM].push_back(vtr::Point<size_t>(ix, 0));
|
||||
}
|
||||
|
||||
/* LEFT side */
|
||||
for (size_t iy = 1; iy < grids.height() - 1; ++iy) {
|
||||
io_coordinates[LEFT].push_back(vtr::Point<size_t>(0, iy));
|
||||
}
|
||||
|
||||
/* Walk through all the grids on the perimeter, which are I/O grids */
|
||||
for (const e_side& io_side : io_sides) {
|
||||
for (const vtr::Point<size_t>& io_coordinate : io_coordinates[io_side]) {
|
||||
/* Bypass EMPTY grid */
|
||||
if (true == is_empty_type(grids[io_coordinate.x()][io_coordinate.y()].type)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Skip width or height > 1 tiles (mostly heterogeneous blocks) */
|
||||
if ( (0 < grids[io_coordinate.x()][io_coordinate.y()].width_offset)
|
||||
|| (0 < grids[io_coordinate.x()][io_coordinate.y()].height_offset)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
t_physical_tile_type_ptr grid_type = grids[io_coordinate.x()][io_coordinate.y()].type;
|
||||
|
||||
/* Find the module name for this type of grid */
|
||||
std::string grid_module_name_prefix(GRID_MODULE_NAME_PREFIX);
|
||||
std::string grid_module_name = generate_grid_block_module_name(grid_module_name_prefix, std::string(grid_type->name), is_io_type(grid_type), io_side);
|
||||
ModuleId grid_module = module_manager.find_module(grid_module_name);
|
||||
VTR_ASSERT(true == module_manager.valid_module_id(grid_module));
|
||||
|
||||
/* Find all the GPIO ports in the grid module */
|
||||
|
||||
/* MUST DO: register in io location mapping!
|
||||
* I/O location mapping is a critical look-up for testbench generators
|
||||
* As we add the I/O grid instances to top module by following order:
|
||||
* TOP -> RIGHT -> BOTTOM -> LEFT
|
||||
* The I/O index will increase in this way as well.
|
||||
* This organization I/O indices is also consistent to the way
|
||||
* that GPIOs are wired in function connect_gpio_module()
|
||||
*
|
||||
* Note: if you change the GPIO function, you should update here as well!
|
||||
*/
|
||||
for (int z = 0; z < grids[io_coordinate.x()][io_coordinate.y()].type->capacity; ++z) {
|
||||
for (const BasicPort& gpio_port : module_manager.module_ports_by_type(grid_module, ModuleManager::MODULE_GPIO_PORT)) {
|
||||
auto curr_io_index = io_counter.find(gpio_port.get_name());
|
||||
/* Index always start from zero */
|
||||
if (curr_io_index == io_counter.end()) {
|
||||
io_counter[gpio_port.get_name()] = 0;
|
||||
}
|
||||
io_location_map.set_io_index(io_coordinate.x(), io_coordinate.y(), z,
|
||||
gpio_port.get_name(),
|
||||
io_counter[gpio_port.get_name()]);
|
||||
io_counter[gpio_port.get_name()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check all the GPIO ports in the top-level module has been mapped */
|
||||
std::string top_module_name = generate_fpga_top_module_name();
|
||||
ModuleId top_module = module_manager.find_module(top_module_name);
|
||||
VTR_ASSERT(true == module_manager.valid_module_id(top_module));
|
||||
|
||||
for (const BasicPort& gpio_port : module_manager.module_ports_by_type(top_module, ModuleManager::MODULE_GPIO_PORT)) {
|
||||
VTR_ASSERT(io_counter[gpio_port.get_name()] == gpio_port.get_width());
|
||||
}
|
||||
|
||||
return io_location_map;
|
||||
}
|
||||
|
||||
} /* end namespace openfpga */
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef BUILD_FABRIC_IO_LOCATION_MAP_H
|
||||
#define BUILD_FABRIC_IO_LOCATION_MAP_H
|
||||
|
||||
/********************************************************************
|
||||
* Include header files that are required by function declaration
|
||||
*******************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include "device_grid.h"
|
||||
#include "io_location_map.h"
|
||||
#include "module_manager.h"
|
||||
|
||||
/********************************************************************
|
||||
* Function declaration
|
||||
*******************************************************************/
|
||||
|
||||
/* begin namespace openfpga */
|
||||
namespace openfpga {
|
||||
|
||||
IoLocationMap build_fabric_io_location_map(const ModuleManager& module_manager,
|
||||
const DeviceGrid& grids);
|
||||
|
||||
} /* end namespace openfpga */
|
||||
|
||||
#endif
|
|
@ -92,7 +92,6 @@ size_t add_top_module_grid_instance(ModuleManager& module_manager,
|
|||
static
|
||||
vtr::Matrix<size_t> add_top_module_grid_instances(ModuleManager& module_manager,
|
||||
const ModuleId& top_module,
|
||||
IoLocationMap& io_location_map,
|
||||
const DeviceGrid& grids) {
|
||||
|
||||
vtr::ScopedStartFinishTimer timer("Add grid instances to top module");
|
||||
|
@ -178,21 +177,6 @@ vtr::Matrix<size_t> add_top_module_grid_instances(ModuleManager& module_manager,
|
|||
|
||||
/* Add a grid module to top_module*/
|
||||
grid_instance_ids[io_coordinate.x()][io_coordinate.y()] = add_top_module_grid_instance(module_manager, top_module, grids[io_coordinate.x()][io_coordinate.y()].type, io_side, io_coordinate);
|
||||
|
||||
/* MUST DO: register in io location mapping!
|
||||
* I/O location mapping is a critical look-up for testbench generators
|
||||
* As we add the I/O grid instances to top module by following order:
|
||||
* TOP -> RIGHT -> BOTTOM -> LEFT
|
||||
* The I/O index will increase in this way as well.
|
||||
* This organization I/O indices is also consistent to the way
|
||||
* that GPIOs are wired in function connect_gpio_module()
|
||||
*
|
||||
* Note: if you change the GPIO function, you should update here as well!
|
||||
*/
|
||||
for (int z = 0; z < grids[io_coordinate.x()][io_coordinate.y()].type->capacity; ++z) {
|
||||
io_location_map.set_io_index(io_coordinate.x(), io_coordinate.y(), z, io_counter);
|
||||
io_counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +306,6 @@ vtr::Matrix<size_t> add_top_module_connection_block_instances(ModuleManager& mod
|
|||
* 5. Add module nets/submodules to connect configuration ports
|
||||
*******************************************************************/
|
||||
int build_top_module(ModuleManager& module_manager,
|
||||
IoLocationMap& io_location_map,
|
||||
DecoderLibrary& decoder_lib,
|
||||
const CircuitLibrary& circuit_lib,
|
||||
const DeviceGrid& grids,
|
||||
|
@ -353,7 +336,7 @@ int build_top_module(ModuleManager& module_manager,
|
|||
|
||||
/* Add sub modules, which are grid, SB and CBX/CBY modules as instances */
|
||||
/* Add all the grids across the fabric */
|
||||
vtr::Matrix<size_t> grid_instance_ids = add_top_module_grid_instances(module_manager, top_module, io_location_map, grids);
|
||||
vtr::Matrix<size_t> grid_instance_ids = add_top_module_grid_instances(module_manager, top_module, grids);
|
||||
/* Add all the SBs across the fabric */
|
||||
vtr::Matrix<size_t> sb_instance_ids = add_top_module_switch_block_instances(module_manager, top_module, device_rr_gsb, compact_routing_hierarchy);
|
||||
/* Add all the CBX and CBYs across the fabric */
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "arch_direct.h"
|
||||
#include "config_protocol.h"
|
||||
#include "module_manager.h"
|
||||
#include "io_location_map.h"
|
||||
#include "fabric_key.h"
|
||||
|
||||
/********************************************************************
|
||||
|
@ -27,7 +26,6 @@
|
|||
namespace openfpga {
|
||||
|
||||
int build_top_module(ModuleManager& module_manager,
|
||||
IoLocationMap& io_location_map,
|
||||
DecoderLibrary& decoder_lib,
|
||||
const CircuitLibrary& circuit_lib,
|
||||
const DeviceGrid& grids,
|
||||
|
|
|
@ -118,7 +118,12 @@ void print_analysis_sdc_io_delays(std::fstream& fp,
|
|||
/* Find the index of the mapped GPIO in top-level FPGA fabric */
|
||||
size_t io_index = io_location_map.io_index(place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.x,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.y,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z);
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z,
|
||||
module_io_port.get_name());
|
||||
|
||||
if (size_t(-1) == io_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Ensure that IO index is in range */
|
||||
BasicPort module_mapped_io_port = module_io_port;
|
||||
|
|
|
@ -464,9 +464,6 @@ void print_verilog_gate_module(const ModuleManager& module_manager,
|
|||
/* Print timing info */
|
||||
print_verilog_submodule_timing(fp, circuit_lib, circuit_model);
|
||||
|
||||
/* Print signal initialization */
|
||||
print_verilog_submodule_signal_init(fp, circuit_lib, circuit_model);
|
||||
|
||||
/* Put an end to the Verilog module */
|
||||
print_verilog_module_end(fp, circuit_lib.model_name(circuit_model));
|
||||
}
|
||||
|
|
|
@ -107,7 +107,8 @@ void print_verilog_simulation_info(const std::string& ini_fname,
|
|||
* For unused ports, by default we assume it is configured as inputs
|
||||
* TODO: this should be reworked to be consistent with bitstream
|
||||
*/
|
||||
std::string io_direction(total_gpio_width, '1');
|
||||
for (const BasicPort& module_io_port : module_manager.module_ports_by_type(top_module, ModuleManager::MODULE_GPIO_PORT)) {
|
||||
std::string io_direction(module_io_port.get_width(), '1');
|
||||
for (const AtomBlockId& atom_blk : atom_ctx.nlist.blocks()) {
|
||||
/* Bypass non-I/O atom blocks ! */
|
||||
if ( (AtomBlockType::INPAD != atom_ctx.nlist.block_type(atom_blk))
|
||||
|
@ -118,7 +119,12 @@ void print_verilog_simulation_info(const std::string& ini_fname,
|
|||
/* Find the index of the mapped GPIO in top-level FPGA fabric */
|
||||
size_t io_index = io_location_map.io_index(place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.x,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.y,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z);
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z,
|
||||
module_io_port.get_name());
|
||||
|
||||
if (size_t(-1) == io_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (AtomBlockType::INPAD == atom_ctx.nlist.block_type(atom_blk)) {
|
||||
io_direction[io_index] = '1';
|
||||
|
@ -126,10 +132,13 @@ void print_verilog_simulation_info(const std::string& ini_fname,
|
|||
VTR_ASSERT(AtomBlockType::OUTPAD == atom_ctx.nlist.block_type(atom_blk));
|
||||
io_direction[io_index] = '0';
|
||||
}
|
||||
}
|
||||
|
||||
std::string io_tag = "IO" + module_io_port.get_name();
|
||||
|
||||
/* Organize the vector to string */
|
||||
ini["SIMULATION_DECK"]["IO"] = io_direction;
|
||||
ini["SIMULATION_DECK"][io_tag] = io_direction;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mINI::INIFile file(ini_fname);
|
||||
|
|
|
@ -170,7 +170,13 @@ void print_verilog_testbench_connect_fpga_ios(std::fstream& fp,
|
|||
/* Find the index of the mapped GPIO in top-level FPGA fabric */
|
||||
size_t io_index = io_location_map.io_index(place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.x,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.y,
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z);
|
||||
place_ctx.block_locs[atom_ctx.lookup.atom_clb(atom_blk)].loc.z,
|
||||
module_io_port.get_name());
|
||||
|
||||
/* Bypass invalid index (not mapped to this GPIO port) */
|
||||
if (size_t(-1) == io_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Ensure that IO index is in range */
|
||||
BasicPort module_mapped_io_port = module_io_port;
|
||||
|
|
|
@ -76,6 +76,10 @@ static bool check_lb_rr_graph_dangling_nodes(const LbRRGraph& lb_rr_graph) {
|
|||
* If so, this is a dangling nodes and report
|
||||
*/
|
||||
for (auto node : lb_rr_graph.nodes()) {
|
||||
/* Bypass 0-capacity node; They can be dangling */
|
||||
if (0 == lb_rr_graph.node_capacity(node)) {
|
||||
continue;
|
||||
}
|
||||
if ((0 == lb_rr_graph.node_in_edges(node).size())
|
||||
&& (0 == lb_rr_graph.node_out_edges(node).size())) {
|
||||
/* Print a warning! */
|
||||
|
@ -105,6 +109,10 @@ static bool check_lb_rr_graph_source_nodes(const LbRRGraph& lb_rr_graph) {
|
|||
if (LB_SOURCE != lb_rr_graph.node_type(node)) {
|
||||
continue;
|
||||
}
|
||||
/* Bypass 0-capacity node; They can be dangling */
|
||||
if (0 == lb_rr_graph.node_capacity(node)) {
|
||||
continue;
|
||||
}
|
||||
if ((0 != lb_rr_graph.node_in_edges(node).size())
|
||||
|| (0 == lb_rr_graph.node_out_edges(node).size())) {
|
||||
/* Print a warning! */
|
||||
|
@ -134,6 +142,10 @@ static bool check_lb_rr_graph_sink_nodes(const LbRRGraph& lb_rr_graph) {
|
|||
if (LB_SINK != lb_rr_graph.node_type(node)) {
|
||||
continue;
|
||||
}
|
||||
/* Bypass 0-capacity node; They can be dangling */
|
||||
if (0 == lb_rr_graph.node_capacity(node)) {
|
||||
continue;
|
||||
}
|
||||
if ((0 == lb_rr_graph.node_in_edges(node).size())
|
||||
|| (0 != lb_rr_graph.node_out_edges(node).size())) {
|
||||
/* Print a warning! */
|
||||
|
@ -184,16 +196,6 @@ bool check_lb_rr_graph(const LbRRGraph& lb_rr_graph) {
|
|||
num_err++;
|
||||
}
|
||||
|
||||
if (false == check_lb_rr_graph_source_nodes(lb_rr_graph)) {
|
||||
VTR_LOG_WARN("Fail in checking source nodes!\n");
|
||||
num_err++;
|
||||
}
|
||||
|
||||
if (false == check_lb_rr_graph_sink_nodes(lb_rr_graph)) {
|
||||
VTR_LOG_WARN("Fail in checking sink nodes!\n");
|
||||
num_err++;
|
||||
}
|
||||
|
||||
/* Error out if there is any fatal errors found */
|
||||
if (0 < num_err) {
|
||||
VTR_LOG_WARN("Checked Logical tile Routing Resource graph with %d errors !\n",
|
||||
|
|
|
@ -63,7 +63,10 @@ void print_lb_rr_node(const LbRRGraph& lb_rr_graph,
|
|||
VTR_LOG("Node id: %d\n", size_t(node));
|
||||
VTR_LOG("Node type: %s\n", lb_rr_type_str[lb_rr_graph.node_type(node)]);
|
||||
VTR_LOG("Node capacity: %d\n", lb_rr_graph.node_capacity(node));
|
||||
/* Some node, e.g., SOURCE, SINK may not have pb_graph_pin, skip outputing in this case */
|
||||
if (nullptr != lb_rr_graph.node_pb_graph_pin(node)) {
|
||||
VTR_LOG("Node pb_graph_pin: %s\n", lb_rr_graph.node_pb_graph_pin(node)->to_string().c_str());
|
||||
}
|
||||
VTR_LOG("Node intrinsic_cost: %f\n", lb_rr_graph.node_intrinsic_cost(node));
|
||||
VTR_LOG("Node num in_edges: %ld\n", lb_rr_graph.node_in_edges(node).size());
|
||||
VTR_LOG("Node num out_edges: %ld\n", lb_rr_graph.node_out_edges(node).size());
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
<!-- Architecture annotation for OpenFPGA framework
|
||||
This annotation supports the k4_frac_cc_sky130nm.xml
|
||||
- General purpose logic block
|
||||
- K = 6, N = 10, I = 40
|
||||
- Single mode
|
||||
- Routing architecture
|
||||
- L = 4, fc_in = 0.15, fc_out = 0.1
|
||||
- Skywater 130nm PDK
|
||||
- circuit models are binded to the opensource skywater
|
||||
foundry middle-speed (ms) standard cell library
|
||||
-->
|
||||
<openfpga_architecture>
|
||||
<technology_library>
|
||||
<device_library>
|
||||
<device_model name="logic" type="transistor">
|
||||
<lib type="industry" corner="TOP_TT" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
|
||||
<design vdd="0.9" pn_ratio="2"/>
|
||||
<pmos name="pch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
|
||||
<nmos name="nch" chan_length="40e-9" min_width="140e-9" variation="logic_transistor_var"/>
|
||||
</device_model>
|
||||
<device_model name="io" type="transistor">
|
||||
<lib type="academia" ref="M" path="${OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.pm"/>
|
||||
<design vdd="2.5" pn_ratio="3"/>
|
||||
<pmos name="pch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
|
||||
<nmos name="nch_25" chan_length="270e-9" min_width="320e-9" variation="io_transistor_var"/>
|
||||
</device_model>
|
||||
</device_library>
|
||||
<variation_library>
|
||||
<variation name="logic_transistor_var" abs_deviation="0.1" num_sigma="3"/>
|
||||
<variation name="io_transistor_var" abs_deviation="0.1" num_sigma="3"/>
|
||||
</variation_library>
|
||||
</technology_library>
|
||||
<circuit_library>
|
||||
<circuit_model type="inv_buf" name="sky130_fd_sc_hd__inv_1" prefix="sky130_fd_sc_hd__inv_1" is_default="true">
|
||||
<design_technology type="cmos" topology="inverter" size="1"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<port type="input" prefix="in" lib_name="A" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="Y" size="1"/>
|
||||
<delay_matrix type="rise" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
<delay_matrix type="fall" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
</circuit_model>
|
||||
<circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_2" prefix="sky130_fd_sc_hd__buf_2" is_default="false">
|
||||
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="2"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<port type="input" prefix="in" lib_name="A" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="X" size="1"/>
|
||||
<delay_matrix type="rise" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
<delay_matrix type="fall" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
</circuit_model>
|
||||
<circuit_model type="inv_buf" name="sky130_fd_sc_hd__buf_4" prefix="sky130_fd_sc_hd__buf_4" is_default="false">
|
||||
<design_technology type="cmos" topology="buffer" size="1" num_level="2" f_per_stage="4"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<port type="input" prefix="in" lib_name="A" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="X" size="1"/>
|
||||
<delay_matrix type="rise" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
<delay_matrix type="fall" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
</circuit_model>
|
||||
<circuit_model type="inv_buf" name="sky130_fd_sc_hd__inv_2" prefix="sky130_fd_sc_hd__inv_2" is_default="false">
|
||||
<design_technology type="cmos" topology="buffer" size="1"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<port type="input" prefix="in" lib_name="A" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="Y" size="1"/>
|
||||
<delay_matrix type="rise" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
<delay_matrix type="fall" in_port="in" out_port="out">
|
||||
10e-12
|
||||
</delay_matrix>
|
||||
</circuit_model>
|
||||
<circuit_model type="gate" name="sky130_fd_sc_hd__or2_1" prefix="sky130_fd_sc_hd__or2_1" is_default="true">
|
||||
<design_technology type="cmos" topology="OR"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="false"/>
|
||||
<port type="input" prefix="a" lib_name="A" size="1"/>
|
||||
<port type="input" prefix="b" lib_name="B" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="X" size="1"/>
|
||||
<delay_matrix type="rise" in_port="a b" out_port="out">
|
||||
10e-12 5e-12
|
||||
</delay_matrix>
|
||||
<delay_matrix type="fall" in_port="a b" out_port="out">
|
||||
10e-12 5e-12
|
||||
</delay_matrix>
|
||||
</circuit_model>
|
||||
<!-- Define a circuit model for the standard cell MUX2
|
||||
OpenFPGA requires the following truth table for the MUX2
|
||||
When the select signal sel is enabled, the first input, i.e., in0
|
||||
will be propagated to the output, i.e., out
|
||||
If your standard cell provider does not offer the exact truth table,
|
||||
you can simply swap the inputs as shown in the example below
|
||||
-->
|
||||
<circuit_model type="gate" name="sky130_fd_sc_hd__mux2_1" prefix="sky130_fd_sc_hd__mux2_1">
|
||||
<design_technology type="cmos" topology="MUX2"/>
|
||||
<device_technology device_model_name="logic"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="false"/>
|
||||
<port type="input" prefix="in0" lib_name="A1" size="1"/>
|
||||
<port type="input" prefix="in1" lib_name="A0" size="1"/>
|
||||
<port type="input" prefix="sel" lib_name="S" size="1"/>
|
||||
<port type="output" prefix="out" lib_name="X" size="1"/>
|
||||
</circuit_model>
|
||||
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="true">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="false"/>
|
||||
<port type="input" prefix="in" size="1"/>
|
||||
<port type="output" prefix="out" size="1"/>
|
||||
<wire_param model_type="pi" R="101" C="22.5e-15" num_level="1"/>
|
||||
<!-- model_type could be T, res_val and cap_val DON'T CARE -->
|
||||
</circuit_model>
|
||||
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="true">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="false"/>
|
||||
<port type="input" prefix="in" size="1"/>
|
||||
<port type="output" prefix="out" size="1"/>
|
||||
<wire_param model_type="pi" R="0" C="0" num_level="1"/>
|
||||
<!-- model_type could be T, res_val cap_val should be defined -->
|
||||
</circuit_model>
|
||||
<circuit_model type="mux" name="mux_tree" prefix="mux_tree" is_default="true" dump_structural_verilog="true">
|
||||
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="false"/>
|
||||
<pass_gate_logic circuit_model_name="sky130_fd_sc_hd__mux2_1"/>
|
||||
<port type="input" prefix="in" size="1"/>
|
||||
<port type="output" prefix="out" size="1"/>
|
||||
<port type="sram" prefix="sram" size="1"/>
|
||||
</circuit_model>
|
||||
<circuit_model type="mux" name="mux_tree_tapbuf" prefix="mux_tree_tapbuf" dump_structural_verilog="true">
|
||||
<design_technology type="cmos" structure="tree" add_const_input="true" const_input_val="1"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_4"/>
|
||||
<pass_gate_logic circuit_model_name="sky130_fd_sc_hd__mux2_1"/>
|
||||
<port type="input" prefix="in" size="1"/>
|
||||
<port type="output" prefix="out" size="1"/>
|
||||
<port type="sram" prefix="sram" size="1"/>
|
||||
</circuit_model>
|
||||
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
|
||||
<circuit_model type="ff" name="SDFFSRQ" prefix="SDFFSRQ" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<port type="input" prefix="D" size="1"/>
|
||||
<port type="input" prefix="DI" lib_name="SI" size="1"/>
|
||||
<port type="input" prefix="Test_en" lib_name="SE" size="1" is_global="true" default_val="0"/>
|
||||
<port type="input" prefix="reset" lib_name="RST" size="1" is_global="true" default_val="0" is_reset="true"/>
|
||||
<port type="input" prefix="set" lib_name="SET" size="1" is_global="true" default_val="0" is_set="true"/>
|
||||
<port type="output" prefix="Q" size="1"/>
|
||||
<port type="clock" prefix="clk" lib_name="CK" size="1" is_global="true" default_val="0" />
|
||||
</circuit_model>
|
||||
<circuit_model type="lut" name="frac_lut4" prefix="frac_lut4" dump_structural_verilog="true">
|
||||
<design_technology type="cmos" fracturable_lut="true"/>
|
||||
<input_buffer exist="false"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_2"/>
|
||||
<lut_input_inverter exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<lut_input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_2"/>
|
||||
<lut_intermediate_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__buf_2" location_map="-1-"/>
|
||||
<pass_gate_logic circuit_model_name="sky130_fd_sc_hd__mux2_1"/>
|
||||
<port type="input" prefix="in" size="4" tri_state_map="---1" circuit_model_name="sky130_fd_sc_hd__or2_1"/>
|
||||
<port type="output" prefix="lut3_out" size="2" lut_frac_level="3" lut_output_mask="0,1"/>
|
||||
<port type="output" prefix="lut4_out" size="1" lut_output_mask="0"/>
|
||||
<port type="sram" prefix="sram" size="16"/>
|
||||
<port type="sram" prefix="mode" size="1" mode_select="true" circuit_model_name="DFF" default_val="1"/>
|
||||
</circuit_model>
|
||||
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
|
||||
<circuit_model type="ccff" name="DFF" prefix="DFF" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/dff.v">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<port type="input" prefix="D" size="1"/>
|
||||
<port type="output" prefix="Q" size="1"/>
|
||||
<port type="output" prefix="QN" size="1"/>
|
||||
<port type="clock" prefix="prog_clk" lib_name="CK" size="1" is_global="true" default_val="0" is_prog="true"/>
|
||||
</circuit_model>
|
||||
<circuit_model type="iopad" name="GPIN" prefix="GPIN" is_default="true" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<port type="inout" prefix="PAD" lib_name="A" size="1" is_global="true" is_io="true" />
|
||||
<port type="output" prefix="inpad" lib_name="Y" size="1"/>
|
||||
</circuit_model>
|
||||
<circuit_model type="iopad" name="GPOUT" prefix="GPOUT" is_default="false" verilog_netlist="${OPENFPGA_PATH}/openfpga_flow/openfpga_cell_library/verilog/gpio.v">
|
||||
<design_technology type="cmos"/>
|
||||
<input_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<output_buffer exist="true" circuit_model_name="sky130_fd_sc_hd__inv_1"/>
|
||||
<port type="inout" prefix="PAD" lib_name="Y" size="1" is_global="true" is_io="true" />
|
||||
<port type="input" prefix="outpad" lib_name="A" size="1"/>
|
||||
</circuit_model>
|
||||
</circuit_library>
|
||||
<configuration_protocol>
|
||||
<organization type="scan_chain" circuit_model_name="DFF" num_regions="1"/>
|
||||
</configuration_protocol>
|
||||
<connection_block>
|
||||
<switch name="ipin_cblock" circuit_model_name="mux_tree_tapbuf"/>
|
||||
</connection_block>
|
||||
<switch_block>
|
||||
<switch name="L1_mux" circuit_model_name="mux_tree_tapbuf"/>
|
||||
<switch name="L2_mux" circuit_model_name="mux_tree_tapbuf"/>
|
||||
<switch name="L4_mux" circuit_model_name="mux_tree_tapbuf"/>
|
||||
</switch_block>
|
||||
<routing_segment>
|
||||
<segment name="L1" circuit_model_name="chan_segment"/>
|
||||
<segment name="L2" circuit_model_name="chan_segment"/>
|
||||
<segment name="L4" circuit_model_name="chan_segment"/>
|
||||
</routing_segment>
|
||||
<direct_connection>
|
||||
<direct name="shift_register" circuit_model_name="direct_interc" type="column" x_dir="positive" y_dir="positive"/>
|
||||
<direct name="scan_chain" circuit_model_name="direct_interc" type="column" x_dir="positive" y_dir="positive"/>
|
||||
</direct_connection>
|
||||
<pb_type_annotations>
|
||||
<!-- physical pb_type binding in complex block IO -->
|
||||
<pb_type name="gp_inpad.inpad" circuit_model_name="GPIN"/>
|
||||
<pb_type name="gp_outpad.outpad" circuit_model_name="GPOUT"/>
|
||||
<!-- End physical pb_type binding in complex block IO -->
|
||||
|
||||
<!-- physical pb_type binding in complex block CLB -->
|
||||
<!-- physical mode will be the default mode if not specified -->
|
||||
<pb_type name="clb.fle" physical_mode_name="physical"/>
|
||||
<pb_type name="clb.fle[physical].fabric.frac_logic.frac_lut4" circuit_model_name="frac_lut4" mode_bits="0"/>
|
||||
<pb_type name="clb.fle[physical].fabric.ff" circuit_model_name="SDFFSRQ"/>
|
||||
<!-- Binding operating pb_type to physical pb_type -->
|
||||
<pb_type name="clb.fle[n2_lut3].lut3inter.ble3.lut3" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4" mode_bits="1" physical_pb_type_index_factor="0.5">
|
||||
<!-- Binding the lut3 to the first 3 inputs of fracturable lut4 -->
|
||||
<port name="in" physical_mode_port="in[0:2]"/>
|
||||
<port name="out" physical_mode_port="lut3_out[0:0]" physical_mode_pin_rotate_offset="1"/>
|
||||
</pb_type>
|
||||
<pb_type name="clb.fle[n2_lut3].lut3inter.ble3.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/>
|
||||
<!-- Binding operating pb_types in mode 'ble4' -->
|
||||
<pb_type name="clb.fle[n1_lut4].ble4.lut4" physical_pb_type_name="clb.fle[physical].fabric.frac_logic.frac_lut4" mode_bits="0">
|
||||
<!-- Binding the lut4 to the first 4 inputs of fracturable lut4 -->
|
||||
<port name="in" physical_mode_port="in[0:3]"/>
|
||||
<port name="out" physical_mode_port="lut4_out"/>
|
||||
</pb_type>
|
||||
<pb_type name="clb.fle[n1_lut4].ble4.ff" physical_pb_type_name="clb.fle[physical].fabric.ff" physical_pb_type_index_factor="2" physical_pb_type_index_offset="0"/>
|
||||
<!-- Binding operating pb_types in mode 'shift_register' -->
|
||||
<pb_type name="clb.fle[shift_register].shift_reg.ff" physical_pb_type_name="clb.fle[physical].fabric.ff"/>
|
||||
<!-- End physical pb_type binding in complex block IO -->
|
||||
</pb_type_annotations>
|
||||
</openfpga_architecture>
|
|
@ -18,3 +18,23 @@ module GPIO (
|
|||
//----- when direction is disabled, the signal is propagated from data out to pad
|
||||
assign PAD = DIR ? 1'bz : A;
|
||||
endmodule
|
||||
|
||||
//-----------------------------------------------------
|
||||
// Function : A minimum input pad
|
||||
//-----------------------------------------------------
|
||||
module GPIN (
|
||||
inout A, // External PAD signal
|
||||
output Y // Data input
|
||||
);
|
||||
assign Y = A;
|
||||
endmodule
|
||||
|
||||
//-----------------------------------------------------
|
||||
// Function : A minimum output pad
|
||||
//-----------------------------------------------------
|
||||
module GPOUT (
|
||||
inout Y, // External PAD signal
|
||||
input A // Data output
|
||||
);
|
||||
assign Y = A;
|
||||
endmodule
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
||||
# Configuration file for running experiments
|
||||
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
||||
# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs
|
||||
# Each job execute fpga_flow script on combination of architecture & benchmark
|
||||
# timeout_each_job is timeout for each job
|
||||
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
||||
|
||||
[GENERAL]
|
||||
run_engine=openfpga_shell
|
||||
power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml
|
||||
power_analysis = true
|
||||
spice_output=false
|
||||
verilog_output=true
|
||||
timeout_each_job = 20*60
|
||||
fpga_flow=vpr_blif
|
||||
|
||||
[OpenFPGA_SHELL]
|
||||
openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/OpenFPGAShellScripts/fix_device_example_script.openfpga
|
||||
openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N8_register_scan_chain_embedded_io_skywater130nm_fdhd_cc_openfpga.xml
|
||||
openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/auto_sim_openfpga.xml
|
||||
openfpga_vpr_device_layout=2x2
|
||||
|
||||
[ARCHITECTURES]
|
||||
arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N8_tileable_register_scan_chain_nonLR_embedded_io_skywater130nm.xml
|
||||
|
||||
[BENCHMARKS]
|
||||
bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.blif
|
||||
|
||||
[SYNTHESIS_PARAM]
|
||||
bench0_top = and2
|
||||
bench0_act = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.act
|
||||
bench0_verilog = ${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/and2/and2.v
|
||||
|
||||
[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH]
|
||||
end_flow_with_test=
|
||||
vpr_fpga_verilog_formal_verification_top_netlist=
|
|
@ -0,0 +1,646 @@
|
|||
<!--
|
||||
Low-cost homogeneous FPGA Architecture.
|
||||
|
||||
- Skywater 130 nm technology
|
||||
- General purpose logic block:
|
||||
K = 4, N = 8, fracturable 4 LUTs (can operate as one 4-LUT or two 3-LUTs with all 3 inputs shared)
|
||||
with optionally registered outputs
|
||||
- Routing architecture:
|
||||
- 10% L = 1, fc_in = 0.15, Fc_out = 0.10
|
||||
- 10% L = 2, fc_in = 0.15, Fc_out = 0.10
|
||||
- 80% L = 4, fc_in = 0.15, Fc_out = 0.10
|
||||
- 100 routing tracks per channel
|
||||
|
||||
Authors: Xifan Tang
|
||||
-->
|
||||
<architecture>
|
||||
<!--
|
||||
ODIN II specific config begins
|
||||
Describes the types of user-specified netlist blocks (in blif, this corresponds to
|
||||
".model [type_of_block]") that this architecture supports.
|
||||
|
||||
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
|
||||
already special structures in blif (.names, .input, .output, and .latch)
|
||||
that describe them.
|
||||
-->
|
||||
<models>
|
||||
<!-- A virtual model for I/O to be used in the physical mode of io block -->
|
||||
<model name="frac_lut4">
|
||||
<input_ports>
|
||||
<port name="in"/>
|
||||
</input_ports>
|
||||
<output_ports>
|
||||
<port name="lut3_out"/>
|
||||
<port name="lut4_out"/>
|
||||
</output_ports>
|
||||
</model>
|
||||
<!-- A virtual model for scan-chain flip-flop to be used in the physical mode of FF -->
|
||||
<model name="scff">
|
||||
<input_ports>
|
||||
<port name="D" clock="clk"/>
|
||||
<port name="DI" clock="clk"/>
|
||||
<port name="clk" is_clock="1"/>
|
||||
</input_ports>
|
||||
<output_ports>
|
||||
<port name="Q" clock="clk"/>
|
||||
</output_ports>
|
||||
</model>
|
||||
</models>
|
||||
<tiles>
|
||||
<!-- Do NOT add clock pins to I/O here!!! VPR does not build clock network in the way that OpenFPGA can support
|
||||
If you need to register the I/O, define clocks in the circuit models
|
||||
These clocks can be handled in back-end
|
||||
-->
|
||||
<tile name="gp_inpad" capacity="8" area="0">
|
||||
<equivalent_sites>
|
||||
<site pb_type="gp_inpad"/>
|
||||
</equivalent_sites>
|
||||
<output name="inpad" num_pins="1"/>
|
||||
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
|
||||
<pinlocations pattern="custom">
|
||||
<loc side="left">gp_inpad.inpad</loc>
|
||||
<loc side="top">gp_inpad.inpad</loc>
|
||||
<loc side="right">gp_inpad.inpad</loc>
|
||||
<loc side="bottom">gp_inpad.inpad</loc>
|
||||
</pinlocations>
|
||||
</tile>
|
||||
<tile name="gp_outpad" capacity="8" area="0">
|
||||
<equivalent_sites>
|
||||
<site pb_type="gp_outpad"/>
|
||||
</equivalent_sites>
|
||||
<input name="outpad" num_pins="1"/>
|
||||
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
|
||||
<pinlocations pattern="custom">
|
||||
<loc side="left">gp_outpad.outpad</loc>
|
||||
<loc side="top">gp_outpad.outpad</loc>
|
||||
<loc side="right">gp_outpad.outpad</loc>
|
||||
<loc side="bottom">gp_outpad.outpad</loc>
|
||||
</pinlocations>
|
||||
</tile>
|
||||
<tile name="clb" area="53894">
|
||||
<equivalent_sites>
|
||||
<site pb_type="clb"/>
|
||||
</equivalent_sites>
|
||||
<input name="I0" num_pins="3" equivalent="full"/>
|
||||
<input name="I0i" num_pins="1" equivalent="none"/>
|
||||
<input name="I1" num_pins="3" equivalent="full"/>
|
||||
<input name="I1i" num_pins="1" equivalent="none"/>
|
||||
<input name="I2" num_pins="3" equivalent="full"/>
|
||||
<input name="I2i" num_pins="1" equivalent="none"/>
|
||||
<input name="I3" num_pins="3" equivalent="full"/>
|
||||
<input name="I3i" num_pins="1" equivalent="none"/>
|
||||
<input name="I4" num_pins="3" equivalent="full"/>
|
||||
<input name="I4i" num_pins="1" equivalent="none"/>
|
||||
<input name="I5" num_pins="3" equivalent="full"/>
|
||||
<input name="I5i" num_pins="1" equivalent="none"/>
|
||||
<input name="I6" num_pins="3" equivalent="full"/>
|
||||
<input name="I6i" num_pins="1" equivalent="none"/>
|
||||
<input name="I7" num_pins="3" equivalent="full"/>
|
||||
<input name="I7i" num_pins="1" equivalent="none"/>
|
||||
<input name="regin" num_pins="1"/>
|
||||
<input name="scin" num_pins="1"/>
|
||||
<output name="O" num_pins="16" equivalent="none"/>
|
||||
<output name="regout" num_pins="1"/>
|
||||
<output name="scout" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
|
||||
<fc_override port_name="regin" fc_type="frac" fc_val="0"/>
|
||||
<fc_override port_name="regout" fc_type="frac" fc_val="0"/>
|
||||
<fc_override port_name="scin" fc_type="frac" fc_val="0"/>
|
||||
<fc_override port_name="scout" fc_type="frac" fc_val="0"/>
|
||||
</fc>
|
||||
<!--pinlocations pattern="spread"/-->
|
||||
<pinlocations pattern="custom">
|
||||
<loc side="left">clb.clk</loc>
|
||||
<loc side="top">clb.regin clb.scin</loc>
|
||||
<loc side="right">clb.O[7:0] clb.I0 clb.I0i clb.I1 clb.I1i clb.I2 clb.I2i clb.I3 clb.I3i </loc>
|
||||
<loc side="bottom">clb.regout clb.scout clb.O[15:8] clb.I4 clb.I4i clb.I5 clb.I5i clb.I6 clb.I6i clb.I7 clb.I7i</loc>
|
||||
</pinlocations>
|
||||
</tile>
|
||||
</tiles>
|
||||
<!-- ODIN II specific config ends -->
|
||||
<!-- Physical descriptions begin -->
|
||||
<layout tileable="true">
|
||||
<auto_layout aspect_ratio="1.0">
|
||||
<!-- On each side, general-purpose inpad and outpad are interleaved -->
|
||||
<!-- On top side, I/Os are organized as inpad, outpad, inpad, outpad, ... -->
|
||||
<region type="gp_inpad" priority="100" startx="1" endx="W-1" incrx="2" starty="H-1" endy="H-1"/>
|
||||
<region type="gp_outpad" priority="100" startx="2" endx="W-1" incrx="2" starty="H-1" endy="H-1"/>
|
||||
<!-- On right side, I/Os are organized as
|
||||
inpad
|
||||
outpad
|
||||
...
|
||||
inpad
|
||||
outpad
|
||||
This is to avoid unroutable conditions when FPGA size is too small (only gp_inpad is available)
|
||||
-->
|
||||
<region type="gp_outpad" priority="100" startx="W-1" endx="W-1" starty="1" endy="H-1" incry="2"/>
|
||||
<region type="gp_inpad" priority="100" startx="W-1" endx="W-1" starty="2" endy="H-1" incry="2"/>
|
||||
<!-- On top side, I/Os are organized as inpad, outpad, inpad, outpad, ... -->
|
||||
<region type="gp_inpad" priority="100" startx="1" endx="W-1" incrx="2" starty="0" endy="0"/>
|
||||
<region type="gp_outpad" priority="100" startx="2" endx="W-1" incrx="2" starty="0" endy="0"/>
|
||||
<corners type="EMPTY" priority="101"/>
|
||||
<!-- On left side, I/Os are organized as
|
||||
inpad
|
||||
outpad
|
||||
...
|
||||
inpad
|
||||
outpad
|
||||
This is to avoid unroutable conditions when FPGA size is too small (only gp_inpad is available)
|
||||
-->
|
||||
<region type="gp_outpad" priority="100" startx="0" endx="0" starty="1" endy="H-1" incry="2"/>
|
||||
<region type="gp_inpad" priority="100" startx="0" endx="0" starty="2" endy="H-1" incry="2"/>
|
||||
<!--Fill with 'clb'-->
|
||||
<fill type="clb" priority="10"/>
|
||||
</auto_layout>
|
||||
<fixed_layout name="2x2" width="4" height="4">
|
||||
<!-- On each side, general-purpose inpad and outpad are interleaved -->
|
||||
<!-- On top side, I/Os are organized as inpad, outpad, inpad, outpad, ... -->
|
||||
<region type="gp_inpad" priority="100" startx="1" endx="W-1" incrx="2" starty="H-1" endy="H-1"/>
|
||||
<region type="gp_outpad" priority="100" startx="2" endx="W-1" incrx="2" starty="H-1" endy="H-1"/>
|
||||
<!-- On right side, I/Os are organized as
|
||||
inpad
|
||||
outpad
|
||||
...
|
||||
inpad
|
||||
outpad
|
||||
This is to avoid unroutable conditions when FPGA size is too small (only gp_inpad is available)
|
||||
-->
|
||||
<region type="gp_outpad" priority="100" startx="W-1" endx="W-1" starty="1" endy="H-1" incry="2"/>
|
||||
<region type="gp_inpad" priority="100" startx="W-1" endx="W-1" starty="2" endy="H-1" incry="2"/>
|
||||
<!-- On top side, I/Os are organized as inpad, outpad, inpad, outpad, ... -->
|
||||
<region type="gp_inpad" priority="100" startx="1" endx="W-1" incrx="2" starty="0" endy="0"/>
|
||||
<region type="gp_outpad" priority="100" startx="2" endx="W-1" incrx="2" starty="0" endy="0"/>
|
||||
<corners type="EMPTY" priority="101"/>
|
||||
<!-- On left side, I/Os are organized as
|
||||
inpad
|
||||
outpad
|
||||
...
|
||||
inpad
|
||||
outpad
|
||||
This is to avoid unroutable conditions when FPGA size is too small (only gp_inpad is available)
|
||||
-->
|
||||
<region type="gp_outpad" priority="100" startx="0" endx="0" starty="1" endy="H-1" incry="2"/>
|
||||
<region type="gp_inpad" priority="100" startx="0" endx="0" starty="2" endy="H-1" incry="2"/>
|
||||
<corners type="EMPTY" priority="101"/>
|
||||
<!--Fill with 'clb'-->
|
||||
<fill type="clb" priority="10"/>
|
||||
</fixed_layout>
|
||||
</layout>
|
||||
<device>
|
||||
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
|
||||
models. We are modifying the delay values however, to include metal C and R, which allows more architecture
|
||||
experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
|
||||
(vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of
|
||||
45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping
|
||||
RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
|
||||
lined up with Stratix IV.
|
||||
We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
|
||||
Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
|
||||
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
|
||||
by 2.5x when looking up in Jeff's tables.
|
||||
The delay values are lined up with Stratix IV, which has an architecture similar to this
|
||||
proposed FPGA, and which is also 40 nm
|
||||
C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
|
||||
4x minimum drive strength buffer. -->
|
||||
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
|
||||
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
|
||||
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
|
||||
-->
|
||||
<area grid_logic_tile_area="0"/>
|
||||
<chan_width_distr>
|
||||
<x distr="uniform" peak="1.000000"/>
|
||||
<y distr="uniform" peak="1.000000"/>
|
||||
</chan_width_distr>
|
||||
<switch_block type="wilton" fs="3" sub_type="subset" sub_fs="3"/>
|
||||
<connection_block input_switch_name="ipin_cblock"/>
|
||||
</device>
|
||||
<switchlist>
|
||||
<!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
|
||||
book area formula. This means the mux transistors are about 5x minimum drive strength.
|
||||
We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large
|
||||
mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
|
||||
the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
|
||||
by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified
|
||||
buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
|
||||
I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout
|
||||
(diff of second stage) listed below. Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
|
||||
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by
|
||||
2.5x when looking up in Jeff's tables.
|
||||
Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
|
||||
This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
|
||||
<switch type="mux" name="L1_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
|
||||
<switch type="mux" name="L2_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
|
||||
<switch type="mux" name="L4_mux" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
|
||||
<!--switch ipin_cblock resistance set to yeild for 4x minimum drive strength buffer-->
|
||||
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
|
||||
</switchlist>
|
||||
<segmentlist>
|
||||
<!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.
|
||||
With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
|
||||
reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
|
||||
<!-- GIVE a specific name for the segment! OpenFPGA appreciate that! -->
|
||||
<segment name="L1" freq="0.10" length="1" type="unidir" Rmetal="101" Cmetal="22.5e-15">
|
||||
<mux name="L1_mux"/>
|
||||
<sb type="pattern">1 1</sb>
|
||||
<cb type="pattern">1</cb>
|
||||
</segment>
|
||||
<segment name="L2" freq="0.10" length="2" type="unidir" Rmetal="101" Cmetal="22.5e-15">
|
||||
<mux name="L2_mux"/>
|
||||
<sb type="pattern">1 1 1</sb>
|
||||
<cb type="pattern">1 1</cb>
|
||||
</segment>
|
||||
<segment name="L4" freq="0.80" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
|
||||
<mux name="L4_mux"/>
|
||||
<sb type="pattern">1 1 1 1 1</sb>
|
||||
<cb type="pattern">1 1 1 1</cb>
|
||||
</segment>
|
||||
</segmentlist>
|
||||
<directlist>
|
||||
<direct name="shift_register" from_pin="clb.regout" to_pin="clb.regin" x_offset="0" y_offset="-1" z_offset="0"/>
|
||||
<direct name="scan_chain" from_pin="clb.scout" to_pin="clb.scin" x_offset="0" y_offset="-1" z_offset="0"/>
|
||||
</directlist>
|
||||
<complexblocklist>
|
||||
<!-- Different from GPIOs, embedded I/O interfaces can afford splitting
|
||||
input and output pin. Therefore, the input pad and output pad are defined
|
||||
as different blocks
|
||||
-->
|
||||
<!-- Define input pads begin -->
|
||||
<pb_type name="gp_inpad">
|
||||
<output name="inpad" num_pins="1"/>
|
||||
<pb_type name="inpad" blif_model=".input" num_pb="1">
|
||||
<output name="inpad" num_pins="1"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="inpad" input="inpad.inpad" output="gp_inpad.inpad">
|
||||
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="gp_inpad.inpad"/>
|
||||
</direct>
|
||||
</interconnect>
|
||||
<power method="ignore"/>
|
||||
</pb_type>
|
||||
<!-- Define input pads end -->
|
||||
<!-- Define output pads begin -->
|
||||
<pb_type name="gp_outpad">
|
||||
<input name="outpad" num_pins="1"/>
|
||||
<pb_type name="outpad" blif_model=".output" num_pb="1">
|
||||
<input name="outpad" num_pins="1"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="outpad" input="gp_outpad.outpad" output="outpad.outpad">
|
||||
<delay_constant max="1.394e-11" in_port="gp_outpad.outpad" out_port="outpad.outpad"/>
|
||||
</direct>
|
||||
</interconnect>
|
||||
<power method="ignore"/>
|
||||
</pb_type>
|
||||
<!-- Define output pads end -->
|
||||
<!-- Define general purpose logic block (CLB) begin -->
|
||||
<!-- -Due to the absence of local routing,
|
||||
the 4 inputs of fracturable LUT4 are no longer equivalent,
|
||||
because the 4th input can not be switched when the dual-LUT3 modes are used.
|
||||
So pin equivalence should be applied to the first 3 inputs only
|
||||
-->
|
||||
<pb_type name="clb">
|
||||
<input name="I0" num_pins="3" equivalent="full"/>
|
||||
<input name="I0i" num_pins="1" equivalent="none"/>
|
||||
<input name="I1" num_pins="3" equivalent="full"/>
|
||||
<input name="I1i" num_pins="1" equivalent="none"/>
|
||||
<input name="I2" num_pins="3" equivalent="full"/>
|
||||
<input name="I2i" num_pins="1" equivalent="none"/>
|
||||
<input name="I3" num_pins="3" equivalent="full"/>
|
||||
<input name="I3i" num_pins="1" equivalent="none"/>
|
||||
<input name="I4" num_pins="3" equivalent="full"/>
|
||||
<input name="I4i" num_pins="1" equivalent="none"/>
|
||||
<input name="I5" num_pins="3" equivalent="full"/>
|
||||
<input name="I5i" num_pins="1" equivalent="none"/>
|
||||
<input name="I6" num_pins="3" equivalent="full"/>
|
||||
<input name="I6i" num_pins="1" equivalent="none"/>
|
||||
<input name="I7" num_pins="3" equivalent="full"/>
|
||||
<input name="I7i" num_pins="1" equivalent="none"/>
|
||||
<input name="regin" num_pins="1"/>
|
||||
<input name="scin" num_pins="1"/>
|
||||
<output name="O" num_pins="16" equivalent="none"/>
|
||||
<output name="regout" num_pins="1"/>
|
||||
<output name="scout" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<!-- Describe fracturable logic element.
|
||||
Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs.
|
||||
The outputs of the fracturable logic element can be optionally registered
|
||||
-->
|
||||
<pb_type name="fle" num_pb="8">
|
||||
<input name="in" num_pins="4"/>
|
||||
<input name="regin" num_pins="1"/>
|
||||
<input name="scin" num_pins="1"/>
|
||||
<output name="out" num_pins="2"/>
|
||||
<output name="regout" num_pins="1"/>
|
||||
<output name="scout" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<!-- Physical mode definition begin (physical implementation of the fle) -->
|
||||
<mode name="physical" disabled_in_pack="true">
|
||||
<pb_type name="fabric" num_pb="1">
|
||||
<input name="in" num_pins="4"/>
|
||||
<input name="regin" num_pins="1"/>
|
||||
<input name="scin" num_pins="1"/>
|
||||
<output name="out" num_pins="2"/>
|
||||
<output name="regout" num_pins="1"/>
|
||||
<output name="scout" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<pb_type name="frac_logic" num_pb="1">
|
||||
<input name="in" num_pins="4"/>
|
||||
<output name="out" num_pins="2"/>
|
||||
<!-- Define LUT -->
|
||||
<pb_type name="frac_lut4" blif_model=".subckt frac_lut4" num_pb="1">
|
||||
<input name="in" num_pins="4"/>
|
||||
<output name="lut3_out" num_pins="2"/>
|
||||
<output name="lut4_out" num_pins="1"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="frac_logic.in" output="frac_lut4.in"/>
|
||||
<direct name="direct2" input="frac_lut4.lut3_out[1]" output="frac_logic.out[1]"/>
|
||||
<!-- Xifan Tang: I use out[0] because the output of lut6 in lut6 mode is wired to the out[0] -->
|
||||
<mux name="mux1" input="frac_lut4.lut4_out frac_lut4.lut3_out[0]" output="frac_logic.out[0]"/>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<!-- Define flip-flop with scan-chain capability, DI is the scan-chain data input -->
|
||||
<pb_type name="ff" blif_model=".subckt scff" num_pb="2">
|
||||
<input name="D" num_pins="1"/>
|
||||
<input name="DI" num_pins="1"/>
|
||||
<output name="Q" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<T_setup value="66e-12" port="ff.D" clock="clk"/>
|
||||
<T_setup value="66e-12" port="ff.DI" clock="clk"/>
|
||||
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="fabric.in" output="frac_logic.in"/>
|
||||
<direct name="direct2" input="fabric.scin" output="ff[0].DI"/>
|
||||
<direct name="direct3" input="ff[0].Q" output="ff[1].DI"/>
|
||||
<direct name="direct4" input="ff[1].Q" output="fabric.scout"/>
|
||||
<direct name="direct5" input="ff[1].Q" output="fabric.regout"/>
|
||||
<direct name="direct6" input="frac_logic.out[1:1]" output="ff[1:1].D"/>
|
||||
<complete name="complete1" input="fabric.clk" output="ff[1:0].clk"/>
|
||||
<mux name="mux1" input="frac_logic.out[0:0] fabric.regin" output="ff[0:0].D">
|
||||
<delay_constant max="25e-12" in_port="frac_logic.out[0:0]" out_port="ff[0:0].D"/>
|
||||
<delay_constant max="45e-12" in_port="fabric.regin" out_port="ff[0:0].D"/>
|
||||
</mux>
|
||||
<mux name="mux2" input="ff[0].Q frac_logic.out[0]" output="fabric.out[0]">
|
||||
<!-- LUT to output is faster than FF to output on a Stratix IV -->
|
||||
<delay_constant max="25e-12" in_port="frac_logic.out[0]" out_port="fabric.out[0]"/>
|
||||
<delay_constant max="45e-12" in_port="ff[0].Q" out_port="fabric.out[0]"/>
|
||||
</mux>
|
||||
<mux name="mux3" input="ff[1].Q frac_logic.out[1]" output="fabric.out[1]">
|
||||
<!-- LUT to output is faster than FF to output on a Stratix IV -->
|
||||
<delay_constant max="25e-12" in_port="frac_logic.out[1]" out_port="fabric.out[1]"/>
|
||||
<delay_constant max="45e-12" in_port="ff[1].Q" out_port="fabric.out[1]"/>
|
||||
</mux>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="fle.in" output="fabric.in"/>
|
||||
<direct name="direct3" input="fle.regin" output="fabric.regin"/>
|
||||
<direct name="direct4" input="fle.scin" output="fabric.scin"/>
|
||||
<direct name="direct5" input="fabric.out" output="fle.out"/>
|
||||
<direct name="direct7" input="fabric.regout" output="fle.regout"/>
|
||||
<direct name="direct8" input="fabric.scout" output="fle.scout"/>
|
||||
<direct name="direct9" input="fle.clk" output="fabric.clk"/>
|
||||
</interconnect>
|
||||
</mode>
|
||||
<!-- Physical mode definition end (physical implementation of the fle) -->
|
||||
<!-- Dual 3-LUT mode definition begin -->
|
||||
<mode name="n2_lut3">
|
||||
<pb_type name="lut3inter" num_pb="1">
|
||||
<input name="in" num_pins="3"/>
|
||||
<output name="out" num_pins="2"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<pb_type name="ble3" num_pb="2">
|
||||
<input name="in" num_pins="3"/>
|
||||
<output name="out" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<!-- Define the LUT -->
|
||||
<pb_type name="lut3" blif_model=".names" num_pb="1" class="lut">
|
||||
<input name="in" num_pins="3" port_class="lut_in"/>
|
||||
<output name="out" num_pins="1" port_class="lut_out"/>
|
||||
<!-- LUT timing using delay matrix -->
|
||||
<!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
|
||||
we instead take the average of these numbers to get more stable results
|
||||
82e-12
|
||||
173e-12
|
||||
261e-12
|
||||
263e-12
|
||||
398e-12
|
||||
-->
|
||||
<delay_matrix type="max" in_port="lut3.in" out_port="lut3.out">
|
||||
235e-12
|
||||
235e-12
|
||||
235e-12
|
||||
</delay_matrix>
|
||||
</pb_type>
|
||||
<!-- Define the flip-flop -->
|
||||
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
|
||||
<input name="D" num_pins="1" port_class="D"/>
|
||||
<output name="Q" num_pins="1" port_class="Q"/>
|
||||
<clock name="clk" num_pins="1" port_class="clock"/>
|
||||
<T_setup value="66e-12" port="ff.D" clock="clk"/>
|
||||
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="ble3.in[2:0]" output="lut3[0:0].in[2:0]"/>
|
||||
<direct name="direct2" input="lut3[0:0].out" output="ff[0:0].D">
|
||||
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
|
||||
<pack_pattern name="ble3" in_port="lut3[0:0].out" out_port="ff[0:0].D"/>
|
||||
</direct>
|
||||
<direct name="direct3" input="ble3.clk" output="ff[0:0].clk"/>
|
||||
<mux name="mux1" input="ff[0:0].Q lut3.out[0:0]" output="ble3.out[0:0]">
|
||||
<!-- LUT to output is faster than FF to output on a Stratix IV -->
|
||||
<delay_constant max="25e-12" in_port="lut3.out[0:0]" out_port="ble3.out[0:0]"/>
|
||||
<delay_constant max="45e-12" in_port="ff[0:0].Q" out_port="ble3.out[0:0]"/>
|
||||
</mux>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="lut3inter.in" output="ble3[0:0].in"/>
|
||||
<direct name="direct2" input="lut3inter.in" output="ble3[1:1].in"/>
|
||||
<direct name="direct3" input="ble3[1:0].out" output="lut3inter.out"/>
|
||||
<complete name="complete1" input="lut3inter.clk" output="ble3[1:0].clk"/>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="fle.in[2:0]" output="lut3inter.in"/>
|
||||
<direct name="direct2" input="lut3inter.out" output="fle.out"/>
|
||||
<direct name="direct3" input="fle.clk" output="lut3inter.clk"/>
|
||||
</interconnect>
|
||||
</mode>
|
||||
<!-- Dual 3-LUT mode definition end -->
|
||||
<!-- 4-LUT mode definition begin -->
|
||||
<mode name="n1_lut4">
|
||||
<!-- Define 4-LUT mode -->
|
||||
<pb_type name="ble4" num_pb="1">
|
||||
<input name="in" num_pins="4"/>
|
||||
<output name="out" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<!-- Define LUT -->
|
||||
<pb_type name="lut4" blif_model=".names" num_pb="1" class="lut">
|
||||
<input name="in" num_pins="4" port_class="lut_in"/>
|
||||
<output name="out" num_pins="1" port_class="lut_out"/>
|
||||
<!-- LUT timing using delay matrix -->
|
||||
<!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
|
||||
we instead take the average of these numbers to get more stable results
|
||||
82e-12
|
||||
173e-12
|
||||
261e-12
|
||||
263e-12
|
||||
398e-12
|
||||
397e-12
|
||||
-->
|
||||
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
|
||||
261e-12
|
||||
261e-12
|
||||
261e-12
|
||||
261e-12
|
||||
</delay_matrix>
|
||||
</pb_type>
|
||||
<!-- Define flip-flop -->
|
||||
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
|
||||
<input name="D" num_pins="1" port_class="D"/>
|
||||
<output name="Q" num_pins="1" port_class="Q"/>
|
||||
<clock name="clk" num_pins="1" port_class="clock"/>
|
||||
<T_setup value="66e-12" port="ff.D" clock="clk"/>
|
||||
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="ble4.in" output="lut4[0:0].in"/>
|
||||
<direct name="direct2" input="lut4.out" output="ff.D">
|
||||
<!-- Advanced user option that tells CAD tool to find LUT+FF pairs in netlist -->
|
||||
<pack_pattern name="ble4" in_port="lut4.out" out_port="ff.D"/>
|
||||
</direct>
|
||||
<direct name="direct3" input="ble4.clk" output="ff.clk"/>
|
||||
<mux name="mux1" input="ff.Q lut4.out" output="ble4.out">
|
||||
<!-- LUT to output is faster than FF to output on a Stratix IV -->
|
||||
<delay_constant max="25e-12" in_port="lut4.out" out_port="ble4.out"/>
|
||||
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble4.out"/>
|
||||
</mux>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="fle.in" output="ble4.in"/>
|
||||
<direct name="direct2" input="ble4.out" output="fle.out[0:0]"/>
|
||||
<direct name="direct3" input="fle.clk" output="ble4.clk"/>
|
||||
</interconnect>
|
||||
</mode>
|
||||
<!-- 4-LUT mode definition end -->
|
||||
<!-- Define shift register begin -->
|
||||
<mode name="shift_register">
|
||||
<pb_type name="shift_reg" num_pb="1">
|
||||
<input name="regin" num_pins="1"/>
|
||||
<output name="regout" num_pins="1"/>
|
||||
<clock name="clk" num_pins="1"/>
|
||||
<pb_type name="ff" blif_model=".latch" num_pb="2" class="flipflop">
|
||||
<input name="D" num_pins="1" port_class="D"/>
|
||||
<output name="Q" num_pins="1" port_class="Q"/>
|
||||
<clock name="clk" num_pins="1" port_class="clock"/>
|
||||
<T_setup value="66e-12" port="ff.D" clock="clk"/>
|
||||
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="shift_reg.regin" output="ff[0].D"/>
|
||||
<direct name="direct2" input="ff[0].Q" output="ff[1].D"/>
|
||||
<direct name="direct3" input="ff[1].Q" output="shift_reg.regout"/>
|
||||
<complete name="complete1" input="shift_reg.clk" output="ff.clk"/>
|
||||
</interconnect>
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<direct name="direct1" input="fle.regin" output="shift_reg.regin"/>
|
||||
<direct name="direct2" input="shift_reg.regout" output="fle.regout"/>
|
||||
<direct name="direct3" input="fle.clk" output="shift_reg.clk"/>
|
||||
</interconnect>
|
||||
</mode>
|
||||
<!-- Define shift register end -->
|
||||
</pb_type>
|
||||
<interconnect>
|
||||
<!-- We use direct connections to reduce the area to the most
|
||||
The global local routing is going to compensate the loss in routability
|
||||
-->
|
||||
<direct name="direct_fle0" input="clb.I0" output="fle[0:0].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle0i" input="clb.I0i" output="fle[0:0].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle1" input="clb.I1" output="fle[1:1].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle1i" input="clb.I1i" output="fle[1:1].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle2" input="clb.I2" output="fle[2:2].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle2i" input="clb.I2i" output="fle[2:2].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle3" input="clb.I3" output="fle[3:3].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle3i" input="clb.I3i" output="fle[3:3].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle4" input="clb.I4" output="fle[4:4].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle4i" input="clb.I4i" output="fle[4:4].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle5" input="clb.I5" output="fle[5:5].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle5i" input="clb.I5i" output="fle[5:5].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle6" input="clb.I6" output="fle[6:6].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle6i" input="clb.I6i" output="fle[6:6].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle7" input="clb.I7" output="fle[7:7].in[0:2]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<direct name="direct_fle7i" input="clb.I7i" output="fle[7:7].in[3]">
|
||||
<!-- TODO: Timing should be backannotated from post-PnR results -->
|
||||
</direct>
|
||||
<complete name="clks" input="clb.clk" output="fle[7:0].clk">
|
||||
</complete>
|
||||
<!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.
|
||||
By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs,
|
||||
then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
|
||||
naive specification).
|
||||
-->
|
||||
<direct name="clbouts1" input="fle[3:0].out[0:1]" output="clb.O[7:0]"/>
|
||||
<direct name="clbouts2" input="fle[7:4].out[0:1]" output="clb.O[15:8]"/>
|
||||
<!-- Shift register chain links -->
|
||||
<direct name="shift_register_in" input="clb.regin" output="fle[0:0].regin">
|
||||
<!-- Put all inter-block carry chain delay on this one edge -->
|
||||
<delay_constant max="0.16e-9" in_port="clb.regin" out_port="fle[0:0].regin"/>
|
||||
<!--pack_pattern name="chain" in_port="clb.regin" out_port="fle[0:0].regin"/-->
|
||||
</direct>
|
||||
<direct name="shift_register_out" input="fle[7:7].regout" output="clb.regout">
|
||||
<!--pack_pattern name="chain" in_port="fle[7:7].regout" out_port="clb.regout"/-->
|
||||
</direct>
|
||||
<direct name="shift_register_link" input="fle[6:0].regout" output="fle[7:1].regin">
|
||||
<!--pack_pattern name="chain" in_port="fle[6:0].regout" out_port="fle[7:1].regin"/-->
|
||||
</direct>
|
||||
<!-- Scan chain links -->
|
||||
<direct name="scan_chain_in" input="clb.scin" output="fle[0:0].scin">
|
||||
<!-- Put all inter-block carry chain delay on this one edge -->
|
||||
<delay_constant max="0.16e-9" in_port="clb.scin" out_port="fle[0:0].scin"/>
|
||||
</direct>
|
||||
<direct name="scan_chain_out" input="fle[7:7].scout" output="clb.scout">
|
||||
</direct>
|
||||
<direct name="scan_chain_link" input="fle[6:0].scout" output="fle[7:1].scin">
|
||||
</direct>
|
||||
</interconnect>
|
||||
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
|
||||
<!-- Place this general purpose logic block in any unspecified column -->
|
||||
</pb_type>
|
||||
<!-- Define general purpose logic block (CLB) ends -->
|
||||
</complexblocklist>
|
||||
</architecture>
|
Loading…
Reference in New Issue