add more flagship architectures

This commit is contained in:
tangxifan 2020-04-17 19:12:27 -06:00
parent a7d900088b
commit 2ea4b8a2a2
2 changed files with 1461 additions and 0 deletions

View File

@ -0,0 +1,680 @@
<!-- Homogeneous FPGA Architecture with Carry Chain for VPR8
- The chip layout is organized with a 2x2 array of Configurable Logic Blocks (CLBs)
surrounded by a ring of I/Os
- [TODO] Delay numbers are extracted from a 12 nm technology
Author: Xifan Tang, Aurelien Alacchi and Ganesh Gore
-->
<architecture>
<!--
ODIN II specific config begins
Describes the types of user-specified netlist blocks (in blif, this corresponds to
".model [type_of_block]") that this architecture supports.
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
already special structures in blif (.names, .input, .output, and .latch)
that describe them.
-->
<models>
<model name="adder">
<input_ports>
<port name="a" combinational_sink_ports="sumout cout"/>
<port name="b" combinational_sink_ports="sumout cout"/>
<port name="cin" combinational_sink_ports="sumout cout"/>
</input_ports>
<output_ports>
<port name="cout"/>
<port name="sumout"/>
</output_ports>
</model>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="io">
<input_ports>
<port name="outpad"/>
</input_ports>
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="frac_lut6">
<input_ports>
<port name="in"/>
</input_ports>
<output_ports>
<port name="lut4_out"/>
<port name="lut5_out"/>
<port name="lut6_out"/>
</output_ports>
</model>
<model name="shift">
<input_ports>
<port name="D" clock="clk"/>
<port name="clk" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="clk"/>
</output_ports>
</model>
<model name="scff">
<input_ports>
<port name="D" clock="clk"/>
<port name="D_chain" clock="clk"/>
<port name="clk" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="clk"/>
</output_ports>
</model>
</models>
<tiles>
<!-- Each I/O tile includes a GPIO -->
<!-- IOs go on the periphery of the FPGA, for consistency,
make it physically equivalent on all sides so that only one definition of I/Os is needed.
If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-->
<tile name="io" capacity="1" area="0">
<equivalent_sites>
<site pb_type="io"/>
</equivalent_sites>
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
-->
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left">io.outpad io.inpad</loc>
<loc side="top">io.outpad io.inpad</loc>
<loc side="right">io.outpad io.inpad</loc>
<loc side="bottom">io.outpad io.inpad</loc>
</pinlocations>
</tile>
<!-- Each CLB tile includes a Configurable Logic Block (CLB)
Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
-->
<tile name="clb" area="53894">
<equivalent_sites>
<site pb_type="clb"/>
</equivalent_sites>
<input name="I0" num_pins="10" equivalent="full"/>
<input name="I1" num_pins="10" equivalent="full"/>
<input name="I2" num_pins="10" equivalent="full"/>
<input name="I3" num_pins="10" equivalent="full"/>
<input name="sc_in" num_pins="1"/>
<input name="cin" num_pins="1"/>
<input name="cin_trick" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="O" num_pins="20" equivalent="none"/>
<output name="sc_out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="cout_copy" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
There are four pins (cin, cout, sc_in, sc_out) has not connection
to routing tracks. There are directed wired from/to adjacent CLBs
-->
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="cin" fc_type="frac" fc_val="0"/>
<fc_override port_name="cout" fc_type="frac" fc_val="0"/>
<fc_override port_name="sc_in" fc_type="frac" fc_val="0"/>
<fc_override port_name="sc_out" fc_type="frac" fc_val="0"/>
</fc>
<!-- Highly recommand to customize pin location when direct connection is used!!! -->
<!-- To ensure best tileable routing architecture (minimize the number of unique SBs
We keep all the pins that touch routing architecture on the right and bottom sides of the tile
Top side pins are mainly for direct connections
-->
<pinlocations pattern="custom">
<loc side="left"></loc>
<loc side="top">clb.sc_in clb.cin clb.cin_trick clb.regin clb.clk</loc>
<loc side="right">clb.I0[9:0] clb.I1[9:0] clb.O[9:0]</loc>
<loc side="bottom">clb.cout clb.cout_copy clb.sc_out clb.regout clb.I2[9:0] clb.I3[9:0] clb.O[19:10]</loc>
</pinlocations>
</tile>
</tiles>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<!-- Apply tileable routing architecture.
This is strongly recommended if you want to PnR large FPGA fabric
-->
<layout tileable="true">
<!--auto_layout aspect_ratio="1.0"-->
<!-- Apply a fixed layout of 2x2 core array.
VPR8 considers the I/O ring in the array size
Therefore the height and width are both 4
-->
<fixed_layout name="32x32" width="34" height="34">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
</fixed_layout>
<!-- /auto_layout -->
</layout>
<device>
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-->
<area grid_logic_tile_area="0"/>
<chan_width_distr>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<!-- Use Wilton-style connecting pattern in switch block
Each routing track has access to only three other routing tracks
(one per each side of the switch block except the side where the routing track locates)
-->
<switch_block type="wilton" fs="3"/>
<connection_block input_switch_name="ipin_cblock"/>
</device>
<switchlist>
<switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
</switchlist>
<segmentlist>
<!-- GIVE a specific name for the segment! OpenFPGA appreciate that! -->
<!-- Uni-directional routing architecture using only length-4 wires in routing channels -->
<segment name="L4" freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
</segmentlist>
<directlist>
<!-- Hard adder chain inside CLB is directly connected between adjacent CLBs -->
<direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
<!-- Scan chain inside CLB is directly connected between adjacent CLBs -->
<direct name="scff_chain" from_pin="clb.sc_out" to_pin="clb.sc_in" x_offset="0" y_offset="-1" z_offset="0"/>
</directlist>
<complexblocklist>
<!-- Define I/O pads begin -->
<!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
<!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
<pb_type name="io">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- Do NOT add clock pins to I/O here!!! VPR does not build clock network in the way that OpenFPGA can support
If you need to register the I/O, define clocks in the circuit models
These clocks can be handled in back-end
-->
<!-- A mode denotes the physical implementation of an I/O
This mode will be not packable but is mainly used for fabric verilog generation
-->
<mode name="physical" packable="false">
<pb_type name="iopad" blif_model=".subckt io" num_pb="1">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="iopad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="iopad.outpad"/>
</direct>
<direct name="inpad" input="iopad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="iopad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<!-- IOs can operate as either inputs or outputs.
Delays below come from Ian Kuon. They are small, so they should be interpreted as
the delays to and from registers in the I/O (and generally I/Os are registered
today and that is when you timing analyze them.
-->
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="outpad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
</direct>
</interconnect>
</mode>
<power method="ignore"/>
</pb_type>
<!-- Define I/O pads ends -->
<!-- Define multi-mode Configurable Logic Block (CLB) begin -->
<!-- Technical highlight:
K6_frac_N10_I40_chain_shiftreg_depop50
- K6_frac: Each Logic Element (LE) contains a fracturable 6 LUT,
which can operate as one 6-LUT or two 5-LUTs or four 4-LUTs
In addition to 6-LUT, each LE also includes two Flip-Flops
- N10: every CLB consists of 10 LEs and a local routing architecture
- I40: every CLB has 40 inputs
- chain: a hard adder chain across all the LEs in a CLB
The inputs of adder are driven by 4-LUTs.
The sumout of adder can optional drive an LE output or a Flip-Flop
The carry-out of adder will drive the carry-in of the next adder in the chain
- shiftreg: Flip-flops inside CLB can be configured as shift registers.
The organization is similar the hard adder chain except it is programmable
- depop50: every local routing multiplexer accesses to 50% of the CLB inputs
-->
<pb_type name="clb">
<input name="I0" num_pins="10" equivalent="full"/>
<input name="I1" num_pins="10" equivalent="full"/>
<input name="I2" num_pins="10" equivalent="full"/>
<input name="I3" num_pins="10" equivalent="full"/>
<input name="sc_in" num_pins="1"/>
<input name="cin" num_pins="1"/>
<input name="cin_trick" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="O" num_pins="20" equivalent="none"/>
<output name="sc_out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="cout_copy" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Describe fracturable logic element -->
<pb_type name="fle" num_pb="10">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<input name="sc_in" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<output name="sc_out" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Describe physical mode begins -->
<mode name="physical" packable="false">
<pb_type name="frac_logic" num_pb="1">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<input name="regin" num_pins="1"/>
<input name="regchain" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<pb_type name="frac_lut6" blif_model=".subckt frac_lut6" num_pb="1">
<input name="in" num_pins="6"/>
<output name="lut4_out" num_pins="4"/>
<output name="lut5_out" num_pins="2"/>
<output name="lut6_out" num_pins="1"/>
</pb_type>
<pb_type name="adder_phy" blif_model=".subckt adder" num_pb="2">
<input name="a" num_pins="1"/>
<input name="b" num_pins="1"/>
<input name="cin" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="sumout" num_pins="1"/>
<delay_constant max="0.3e-9" in_port="adder_phy.a" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.b" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.cin" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.a" out_port="adder_phy.cout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.b" out_port="adder_phy.cout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.cin" out_port="adder_phy.cout"/>
</pb_type>
<interconnect>
<direct name="direct_fraclut_in" input="frac_logic.in[5:0]" output="frac_lut6.in[5:0]"/>
<direct name="direct_cin" input="frac_logic.cin" output="adder_phy[0].cin"/>
<direct name="direct_carry" input="adder_phy[0].cout" output="adder_phy[1].cin"/>
<direct name="direct_cout" input="adder_phy[1].cout" output="frac_logic.cout"/>
<direct name="direct_lut4carry0" input="frac_lut6.lut4_out[0]" output="adder_phy[0].a"/>
<direct name="direct_lut4carry1" input="frac_lut6.lut4_out[1]" output="adder_phy[0].b"/>
<direct name="direct_lut4carry2" input="frac_lut6.lut4_out[2]" output="adder_phy[1].a"/>
<direct name="direct_lut4carry3" input="frac_lut6.lut4_out[3]" output="adder_phy[1].b"/>
<mux name="mux1" input="adder_phy[0].sumout frac_lut6.lut5_out[0] frac_logic.regin" output="frac_logic.out[0]">
</mux>
<mux name="mux2" input="adder_phy[1].sumout frac_lut6.lut5_out[1] frac_lut6.lut6_out[0] frac_logic.regchain[0]" output="frac_logic.out[1]">
</mux>
</interconnect>
</pb_type>
<pb_type name="ff_phy" blif_model=".subckt scff" num_pb="2">
<input name="D" num_pins="1"/>
<input name="D_chain" num_pins="1"/>
<output name="Q" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<T_setup value="66e-12" port="ff_phy.D" clock="clk"/>
<T_setup value="66e-12" port="ff_phy.D_chain" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff_phy.Q" clock="clk"/>
</pb_type>
<interconnect>
<complete name="direct_clk" input="fle.clk" output="ff_phy[1:0].clk"/>
<direct name="direct_in" input="fle.in[5:0]" output="frac_logic.in[5:0]"/>
<direct name="direct_regin" input="fle.regin" output="frac_logic.regin"/>
<direct name="direct_regchain" input="ff_phy[0].Q" output="frac_logic.regchain"/>
<direct name="direct_regout" input="ff_phy[1].Q" output="fle.regout"/>
<direct name="direct_cin" input="fle.cin" output="frac_logic.cin"/>
<direct name="direct_cout" input="frac_logic.cout" output="fle.cout"/>
<direct name="direct_frac_out1" input="frac_logic.out[0]" output="ff_phy[0].D"/>
<direct name="direct_frac_out2" input="frac_logic.out[1]" output="ff_phy[1].D"/>
<direct name="direct_fle_scin" input="fle.sc_in" output="ff_phy[0].D_chain"/>
<direct name="direct_fle_sc_chain" input="ff_phy[0].Q" output="ff_phy[1].D_chain"/>
<direct name="direct_fle_scout" input="ff_phy[1].Q" output="fle.sc_out"/>
<mux name="mux1" input="ff_phy[0].Q frac_logic.out[0]" output="fle.out[0]">
</mux>
<mux name="mux2" input="ff_phy[1].Q frac_logic.out[1]" output="fle.out[1]">
</mux>
</interconnect>
</mode>
<!-- Define physical mode begins -->
<!-- Define n2_lut5 mode begins -->
<mode name="n2_lut5" packable="true">
<pb_type name="lut5inter" num_pb="1">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ble5" num_pb="2">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<mode name="blut5">
<pb_type name="flut5" num_pb="1">
<input name="in" num_pins="5"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Regular LUT mode -->
<pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="5" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
202e-12
202e-12
202e-12
202e-12
202e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="flut5.in" output="lut5.in"/>
<direct name="direct2" input="lut5.out" output="ff.D">
<pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="flut5.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut5.out" output="flut5.out">
<delay_constant max="25e-12" in_port="lut5.out" out_port="flut5.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="flut5.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in" output="flut5.in"/>
<direct name="direct2" input="ble5.clk" output="flut5.clk"/>
<direct name="direct3" input="flut5.out" output="ble5.out"/>
</interconnect>
</mode>
<mode name="arithmetic">
<pb_type name="arithmetic" num_pb="1">
<input name="in" num_pins="4"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Special dual-LUT mode that drives adder only -->
<pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
<input name="in" num_pins="4" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
180e-12
180e-12
180e-12
180e-12
</delay_matrix>
</pb_type>
<pb_type name="adder" blif_model=".subckt adder" num_pb="1">
<input name="a" num_pins="1"/>
<input name="b" num_pins="1"/>
<input name="cin" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="sumout" num_pins="1"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.cin" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.cout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.cout"/>
<delay_constant max="0.3e-9" in_port="adder.cin" out_port="adder.cout"/>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="clock" input="arithmetic.clk" output="ff.clk"/>
<direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
<direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
<direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
</direct>
<direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
</direct>
<direct name="add_to_ff" input="adder.sumout" output="ff.D">
<pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
</direct>
<direct name="carry_in" input="arithmetic.cin" output="adder.cin">
<pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
</direct>
<direct name="carry_out" input="adder.cout" output="arithmetic.cout">
<pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
</direct>
<mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
<delay_constant max="25e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="arithmetic.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
<direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
<!--pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/-->
</direct>
<direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
<!--pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/-->
</direct>
<direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
<direct name="direct3" input="arithmetic.out" output="ble5.out"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
<direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
<direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
<direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
<!--pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/-->
</direct>
<direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
<!--pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/-->
</direct>
<direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
<!--pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/-->
</direct>
<complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
<direct name="direct2" input="lut5inter.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
<direct name="carry_in" input="fle.cin" output="lut5inter.cin">
<!--pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/-->
</direct>
<direct name="carry_out" input="lut5inter.cout" output="fle.cout">
<!--pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/-->
</direct>
</interconnect>
</mode>
<!-- Define n2_lut5 mode ends -->
<mode name="n1_lut6">
<pb_type name="ble6" num_pb="1">
<input name="in" num_pins="6"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="6" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
229e-12
229e-12
229e-12
229e-12
229e-12
229e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
<direct name="direct2" input="lut6.out" output="ff.D">
<pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble6.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
<delay_constant max="25e-12" in_port="lut6.out" out_port="ble6.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble6.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble6.in"/>
<direct name="direct2" input="ble6.out" output="fle.out[1:1]"/>
<direct name="direct3" input="fle.clk" output="ble6.clk"/>
</interconnect>
</mode>
<!-- Define n1_lut6 mode ends -->
<!-- Define shift register mode begins -->
<mode name="shift_register">
<pb_type name="ble_shift" num_pb="1">
<input name="in" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ff" blif_model=".subckt shift" num_pb="2" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble_shift.in" output="ff[0].D"/>
<direct name="direct2" input="ff[0].Q" output="ff[1].D">
<!--pack_pattern name="ble_shift" in_port="ff[0].Q" out_port="ff[1].D"/-->
</direct>
<direct name="out1" input="ff[0].Q" output="ble_shift.out[0]"/>
<direct name="out2" input="ff[1].Q" output="ble_shift.out[1]"/>
<direct name="direct_regout" input="ff[1].Q" output="ble_shift.regout"/>
<complete name="direct3" input="ble_shift.clk" output="ff[1:0].clk"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.regin" output="ble_shift.in"/>
<direct name="direct2" input="ble_shift.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="ble_shift.clk"/>
<direct name="direct4" input="ble_shift.regout" output="fle.regout"/>
</interconnect>
</mode>
<!-- Define shift_register mode end -->
</pb_type>
<interconnect>
<complete name="crossbar0" input="clb.I2 clb.I3 fle[9:0].out" output="fle[9:0].in[0]">
<delay_constant max="190e-12" in_port="clb.I2 clb.I3" out_port="fle[9:0].in[0]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[0]" />
</complete>
<complete name="crossbar1" input="clb.I1 clb.I2 fle[9:0].out" output="fle[9:0].in[1]">
<delay_constant max="190e-12" in_port="clb.I1 clb.I2" out_port="fle[9:0].in[1]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[1]" />
</complete>
<complete name="crossbar2" input="clb.I0 clb.I1 fle[9:0].out" output="fle[9:0].in[2]">
<delay_constant max="190e-12" in_port="clb.I0 clb.I1" out_port="fle[9:0].in[2]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[2]" />
</complete>
<complete name="crossbar3" input="clb.I1 clb.I3 fle[9:0].out" output="fle[9:0].in[3]">
<delay_constant max="190e-12" in_port="clb.I1 clb.I3" out_port="fle[9:0].in[3]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[3]" />
</complete>
<complete name="crossbar4" input="clb.I0 clb.I2 fle[9:0].out" output="fle[9:0].in[4]">
<delay_constant max="190e-12" in_port="clb.I0 clb.I2" out_port="fle[9:0].in[4]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[4]" />
</complete>
<complete name="crossbar5" input="clb.I0 clb.I3 fle[9:0].out" output="fle[9:0].in[5]">
</complete>
<complete name="clks" input="clb.clk" output="fle[9:0].clk">
</complete>
<complete name="carry_in" input="clb.cin clb.cin_trick fle[9:0].out" output="fle[0:0].cin">
<!-- Put all inter-block carry chain delay on this one edge -->
<!--delay_constant max="0.15e-9" in_port="clb.cin clb.cin_trick" out_port="fle[0:0].cin"/-->
<pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
<!--pack_pattern name="chain" in_port="clb.cin_trick" out_port="fle[0:0].cin"/-->
</complete>
<!--direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
<pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
</direct-->
<direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
<direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
<direct name="cout_copy" input="fle[9:9].cout" output="clb.cout_copy"/>
<!-- Shift register links -->
<direct name="regin" input="clb.regin" output="fle[0:0].regin">
<!-- Put all inter-block carry chain delay on this one edge -->
<delay_constant max="0.15e-9" in_port="clb.regin" out_port="fle[0:0].regin"/>
<pack_pattern name="chain" in_port="clb.regin" out_port="fle[0:0].regin"/>
</direct>
<direct name="regout" input="fle[9:9].regout" output="clb.regout">
<pack_pattern name="chain" in_port="fle[9:9].regout" out_port="clb.regout"/>
</direct>
<direct name="reg_link" input="fle[8:0].regout" output="fle[9:1].regin">
<pack_pattern name="chain" in_port="fle[8:0].regout" out_port="fle[9:1].regin"/>
</direct>
<!-- Carry chain links -->
<direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
<pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
</direct>
<direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
<pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
</direct>
<!-- Scan chain links -->
<direct name="sc_in" input="clb.sc_in" output="fle[0:0].sc_in">
</direct>
<direct name="sc_out" input="fle[9:9].sc_out" output="clb.sc_out">
</direct>
<direct name="sc_link" input="fle[8:0].sc_out" output="fle[9:1].sc_in">
</direct>
</interconnect>
</pb_type>
<!-- Define general purpose logic block (CLB) ends -->
</complexblocklist>
</architecture>

View File

@ -0,0 +1,781 @@
<!-- Homogeneous FPGA Architecture with Carry Chain for VPR8
- The chip layout is organized with a 2x2 array of Configurable Logic Blocks (CLBs)
surrounded by a ring of I/Os
- [TODO] Delay numbers are extracted from a 12 nm technology
Author: Xifan Tang, Aurelien Alacchi and Ganesh Gore
-->
<architecture>
<!--
ODIN II specific config begins
Describes the types of user-specified netlist blocks (in blif, this corresponds to
".model [type_of_block]") that this architecture supports.
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
already special structures in blif (.names, .input, .output, and .latch)
that describe them.
-->
<models>
<model name="adder">
<input_ports>
<port name="a" combinational_sink_ports="sumout cout"/>
<port name="b" combinational_sink_ports="sumout cout"/>
<port name="cin" combinational_sink_ports="sumout cout"/>
</input_ports>
<output_ports>
<port name="cout"/>
<port name="sumout"/>
</output_ports>
</model>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="io">
<input_ports>
<port name="outpad"/>
</input_ports>
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
<!-- A virtual model for I/O to be used in the physical mode of io block -->
<model name="frac_lut6">
<input_ports>
<port name="in"/>
</input_ports>
<output_ports>
<port name="lut4_out"/>
<port name="lut5_out"/>
<port name="lut6_out"/>
</output_ports>
</model>
<model name="shift">
<input_ports>
<port name="D" clock="clk"/>
<port name="clk" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="clk"/>
</output_ports>
</model>
<model name="scff">
<input_ports>
<port name="D" clock="clk"/>
<port name="D_chain" clock="clk"/>
<port name="clk" is_clock="1"/>
</input_ports>
<output_ports>
<port name="Q" clock="clk"/>
</output_ports>
</model>
<model name="dual_port_ram">
<input_ports>
<!-- write address lines -->
<port name="waddr" clock="clk"/>
<!-- read address lines -->
<port name="raddr" clock="clk"/>
<!-- data lines can be broken down into smaller bit widths minimum size 1 -->
<port name="d_in" clock="clk"/>
<!-- write enable -->
<port name="wen" clock="clk"/>
<!-- read enable -->
<port name="ren" clock="clk"/>
<!-- memories are often clocked -->
<port name="clk" is_clock="1"/>
</input_ports>
<output_ports>
<!-- output can be broken down into smaller bit widths minimum size 1 -->
<port name="d_out" clock="clk"/>
</output_ports>
</model>
</models>
<tiles>
<!-- Each I/O tile includes a GPIO -->
<!-- IOs go on the periphery of the FPGA, for consistency,
make it physically equivalent on all sides so that only one definition of I/Os is needed.
If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-->
<tile name="io" capacity="1" area="0">
<equivalent_sites>
<site pb_type="io"/>
</equivalent_sites>
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
-->
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left">io.outpad io.inpad</loc>
<loc side="top">io.outpad io.inpad</loc>
<loc side="right">io.outpad io.inpad</loc>
<loc side="bottom">io.outpad io.inpad</loc>
</pinlocations>
</tile>
<!-- Each CLB tile includes a Configurable Logic Block (CLB)
Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
-->
<tile name="clb" area="53894">
<equivalent_sites>
<site pb_type="clb"/>
</equivalent_sites>
<input name="I0" num_pins="10" equivalent="full"/>
<input name="I1" num_pins="10" equivalent="full"/>
<input name="I2" num_pins="10" equivalent="full"/>
<input name="I3" num_pins="10" equivalent="full"/>
<input name="sc_in" num_pins="1"/>
<input name="cin" num_pins="1"/>
<input name="cin_trick" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="O" num_pins="20" equivalent="none"/>
<output name="sc_out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="cout_copy" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Each input of the tile can be driven by 15% of routing tracks
Each output of the tile can drive 10% of routing tracks
There are four pins (cin, cout, sc_in, sc_out) has not connection
to routing tracks. There are directed wired from/to adjacent CLBs
-->
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
<fc_override port_name="cin" fc_type="frac" fc_val="0"/>
<fc_override port_name="cout" fc_type="frac" fc_val="0"/>
<fc_override port_name="sc_in" fc_type="frac" fc_val="0"/>
<fc_override port_name="sc_out" fc_type="frac" fc_val="0"/>
</fc>
<!-- Highly recommand to customize pin location when direct connection is used!!! -->
<!-- To ensure best tileable routing architecture (minimize the number of unique SBs
We keep all the pins that touch routing architecture on the right and bottom sides of the tile
Top side pins are mainly for direct connections
-->
<pinlocations pattern="custom">
<loc side="left"></loc>
<loc side="top">clb.sc_in clb.cin clb.cin_trick clb.regin clb.clk</loc>
<loc side="right">clb.I0[9:0] clb.I1[9:0] clb.O[9:0]</loc>
<loc side="bottom">clb.cout clb.cout_copy clb.sc_out clb.regout clb.I2[9:0] clb.I3[9:0] clb.O[19:10]</loc>
</pinlocations>
</tile>
<tile name="memory" height="2" area="548000">
<equivalent_sites>
<site pb_type="memory"/>
</equivalent_sites>
<input name="waddr" num_pins="10"/>
<input name="raddr" num_pins="10"/>
<input name="d_in" num_pins="32"/>
<input name="wen" num_pins="1"/>
<input name="ren" num_pins="1"/>
<output name="d_out" num_pins="32"/>
<clock name="clk" num_pins="1"/>
<fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
<pinlocations pattern="custom">
<loc side="left"></loc>
<loc side="top">memory.clk</loc>
<loc side="right">memory.waddr memory.d_in[15:0] memory.wen memory.d_out[15:0]</loc>
<loc side="bottom">memory.raddr memory.d_in[31:16] memory.ren memory.d_out[31:16]</loc>
</pinlocations>
</tile>
</tiles>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<!-- Apply tileable routing architecture.
This is strongly recommended if you want to PnR large FPGA fabric
-->
<layout tileable="true">
<!--auto_layout aspect_ratio="1.0"-->
<!-- Apply a fixed layout of 2x2 core array.
VPR8 considers the I/O ring in the array size
Therefore the height and width are both 4
-->
<fixed_layout name="32x32" width="34" height="34">
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
<perimeter type="io" priority="100"/>
<corners type="EMPTY" priority="101"/>
<!--Fill with 'clb'-->
<fill type="clb" priority="10"/>
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
<col type="memory" startx="16" starty="1" repeatx="16" priority="20"/>
<col type="EMPTY" startx="16" repeatx="16" starty="1" priority="19"/>
</fixed_layout>
<!-- /auto_layout -->
</layout>
<device>
<sizing R_minW_nmos="8926" R_minW_pmos="16067"/>
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-->
<area grid_logic_tile_area="0"/>
<chan_width_distr>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<!-- Use Wilton-style connecting pattern in switch block
Each routing track has access to only three other routing tracks
(one per each side of the switch block except the side where the routing track locates)
-->
<switch_block type="wilton" fs="3"/>
<connection_block input_switch_name="ipin_cblock"/>
</device>
<switchlist>
<switch type="mux" name="0" R="551" Cin=".77e-15" Cout="4e-15" Tdel="58e-12" mux_trans_size="2.630740" buf_size="27.645901"/>
<switch type="mux" name="ipin_cblock" R="2231.5" Cout="0." Cin="1.47e-15" Tdel="7.247000e-11" mux_trans_size="1.222260" buf_size="auto"/>
</switchlist>
<segmentlist>
<!-- GIVE a specific name for the segment! OpenFPGA appreciate that! -->
<!-- Uni-directional routing architecture using only length-4 wires in routing channels -->
<segment name="L4" freq="1.000000" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15">
<mux name="0"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
</segmentlist>
<directlist>
<!-- Hard adder chain inside CLB is directly connected between adjacent CLBs -->
<direct name="adder_carry" from_pin="clb.cout" to_pin="clb.cin" x_offset="0" y_offset="-1" z_offset="0"/>
<!-- Scan chain inside CLB is directly connected between adjacent CLBs -->
<direct name="scff_chain" from_pin="clb.sc_out" to_pin="clb.sc_in" x_offset="0" y_offset="-1" z_offset="0"/>
</directlist>
<complexblocklist>
<!-- Define I/O pads begin -->
<!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
<!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
<pb_type name="io">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- Do NOT add clock pins to I/O here!!! VPR does not build clock network in the way that OpenFPGA can support
If you need to register the I/O, define clocks in the circuit models
These clocks can be handled in back-end
-->
<!-- A mode denotes the physical implementation of an I/O
This mode will be not packable but is mainly used for fabric verilog generation
-->
<mode name="physical" packable="false">
<pb_type name="iopad" blif_model=".subckt io" num_pb="1">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="iopad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="iopad.outpad"/>
</direct>
<direct name="inpad" input="iopad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="iopad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<!-- IOs can operate as either inputs or outputs.
Delays below come from Ian Kuon. They are small, so they should be interpreted as
the delays to and from registers in the I/O (and generally I/Os are registered
today and that is when you timing analyze them.
-->
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1">
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="io.inpad">
<delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1">
<input name="outpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="outpad.outpad">
<delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
</direct>
</interconnect>
</mode>
<power method="ignore"/>
</pb_type>
<!-- Define I/O pads ends -->
<!-- Define multi-mode Configurable Logic Block (CLB) begin -->
<!-- Technical highlight:
K6_frac_N10_I40_chain_shiftreg_depop50
- K6_frac: Each Logic Element (LE) contains a fracturable 6 LUT,
which can operate as one 6-LUT or two 5-LUTs or four 4-LUTs
In addition to 6-LUT, each LE also includes two Flip-Flops
- N10: every CLB consists of 10 LEs and a local routing architecture
- I40: every CLB has 40 inputs
- chain: a hard adder chain across all the LEs in a CLB
The inputs of adder are driven by 4-LUTs.
The sumout of adder can optional drive an LE output or a Flip-Flop
The carry-out of adder will drive the carry-in of the next adder in the chain
- shiftreg: Flip-flops inside CLB can be configured as shift registers.
The organization is similar the hard adder chain except it is programmable
- depop50: every local routing multiplexer accesses to 50% of the CLB inputs
-->
<pb_type name="clb">
<input name="I0" num_pins="10" equivalent="full"/>
<input name="I1" num_pins="10" equivalent="full"/>
<input name="I2" num_pins="10" equivalent="full"/>
<input name="I3" num_pins="10" equivalent="full"/>
<input name="sc_in" num_pins="1"/>
<input name="cin" num_pins="1"/>
<input name="cin_trick" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="O" num_pins="20" equivalent="none"/>
<output name="sc_out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="cout_copy" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Describe fracturable logic element -->
<pb_type name="fle" num_pb="10">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<input name="sc_in" num_pins="1"/>
<input name="regin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<output name="sc_out" num_pins="1"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Describe physical mode begins -->
<mode name="physical" packable="false">
<pb_type name="frac_logic" num_pb="1">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<input name="regin" num_pins="1"/>
<input name="regchain" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<pb_type name="frac_lut6" blif_model=".subckt frac_lut6" num_pb="1">
<input name="in" num_pins="6"/>
<output name="lut4_out" num_pins="4"/>
<output name="lut5_out" num_pins="2"/>
<output name="lut6_out" num_pins="1"/>
</pb_type>
<pb_type name="adder_phy" blif_model=".subckt adder" num_pb="2">
<input name="a" num_pins="1"/>
<input name="b" num_pins="1"/>
<input name="cin" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="sumout" num_pins="1"/>
<delay_constant max="0.3e-9" in_port="adder_phy.a" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.b" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.cin" out_port="adder_phy.sumout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.a" out_port="adder_phy.cout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.b" out_port="adder_phy.cout"/>
<delay_constant max="0.3e-9" in_port="adder_phy.cin" out_port="adder_phy.cout"/>
</pb_type>
<interconnect>
<direct name="direct_fraclut_in" input="frac_logic.in[5:0]" output="frac_lut6.in[5:0]"/>
<direct name="direct_cin" input="frac_logic.cin" output="adder_phy[0].cin"/>
<direct name="direct_carry" input="adder_phy[0].cout" output="adder_phy[1].cin"/>
<direct name="direct_cout" input="adder_phy[1].cout" output="frac_logic.cout"/>
<direct name="direct_lut4carry0" input="frac_lut6.lut4_out[0]" output="adder_phy[0].a"/>
<direct name="direct_lut4carry1" input="frac_lut6.lut4_out[1]" output="adder_phy[0].b"/>
<direct name="direct_lut4carry2" input="frac_lut6.lut4_out[2]" output="adder_phy[1].a"/>
<direct name="direct_lut4carry3" input="frac_lut6.lut4_out[3]" output="adder_phy[1].b"/>
<mux name="mux1" input="adder_phy[0].sumout frac_lut6.lut5_out[0] frac_logic.regin" output="frac_logic.out[0]">
</mux>
<mux name="mux2" input="adder_phy[1].sumout frac_lut6.lut5_out[1] frac_lut6.lut6_out[0] frac_logic.regchain[0]" output="frac_logic.out[1]">
</mux>
</interconnect>
</pb_type>
<pb_type name="ff_phy" blif_model=".subckt scff" num_pb="2">
<input name="D" num_pins="1"/>
<input name="D_chain" num_pins="1"/>
<output name="Q" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<T_setup value="66e-12" port="ff_phy.D" clock="clk"/>
<T_setup value="66e-12" port="ff_phy.D_chain" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff_phy.Q" clock="clk"/>
</pb_type>
<interconnect>
<complete name="direct_clk" input="fle.clk" output="ff_phy[1:0].clk"/>
<direct name="direct_in" input="fle.in[5:0]" output="frac_logic.in[5:0]"/>
<direct name="direct_regin" input="fle.regin" output="frac_logic.regin"/>
<direct name="direct_regchain" input="ff_phy[0].Q" output="frac_logic.regchain"/>
<direct name="direct_regout" input="ff_phy[1].Q" output="fle.regout"/>
<direct name="direct_cin" input="fle.cin" output="frac_logic.cin"/>
<direct name="direct_cout" input="frac_logic.cout" output="fle.cout"/>
<direct name="direct_frac_out1" input="frac_logic.out[0]" output="ff_phy[0].D"/>
<direct name="direct_frac_out2" input="frac_logic.out[1]" output="ff_phy[1].D"/>
<direct name="direct_fle_scin" input="fle.sc_in" output="ff_phy[0].D_chain"/>
<direct name="direct_fle_sc_chain" input="ff_phy[0].Q" output="ff_phy[1].D_chain"/>
<direct name="direct_fle_scout" input="ff_phy[1].Q" output="fle.sc_out"/>
<mux name="mux1" input="ff_phy[0].Q frac_logic.out[0]" output="fle.out[0]">
</mux>
<mux name="mux2" input="ff_phy[1].Q frac_logic.out[1]" output="fle.out[1]">
</mux>
</interconnect>
</mode>
<!-- Define physical mode begins -->
<!-- Define n2_lut5 mode begins -->
<mode name="n2_lut5" packable="true">
<pb_type name="lut5inter" num_pb="1">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ble5" num_pb="2">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<mode name="blut5">
<pb_type name="flut5" num_pb="1">
<input name="in" num_pins="5"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Regular LUT mode -->
<pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="5" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
202e-12
202e-12
202e-12
202e-12
202e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="flut5.in" output="lut5.in"/>
<direct name="direct2" input="lut5.out" output="ff.D">
<pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="flut5.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut5.out" output="flut5.out">
<delay_constant max="25e-12" in_port="lut5.out" out_port="flut5.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="flut5.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in" output="flut5.in"/>
<direct name="direct2" input="ble5.clk" output="flut5.clk"/>
<direct name="direct3" input="flut5.out" output="ble5.out"/>
</interconnect>
</mode>
<mode name="arithmetic">
<pb_type name="arithmetic" num_pb="1">
<input name="in" num_pins="4"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Special dual-LUT mode that drives adder only -->
<pb_type name="lut4" blif_model=".names" num_pb="2" class="lut">
<input name="in" num_pins="4" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
180e-12
180e-12
180e-12
180e-12
</delay_matrix>
</pb_type>
<pb_type name="adder" blif_model=".subckt adder" num_pb="1">
<input name="a" num_pins="1"/>
<input name="b" num_pins="1"/>
<input name="cin" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="sumout" num_pins="1"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.cin" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.cout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.cout"/>
<delay_constant max="0.3e-9" in_port="adder.cin" out_port="adder.cout"/>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="clock" input="arithmetic.clk" output="ff.clk"/>
<direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
<direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
<direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
</direct>
<direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
</direct>
<direct name="add_to_ff" input="adder.sumout" output="ff.D">
<pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
</direct>
<direct name="carry_in" input="arithmetic.cin" output="adder.cin">
<pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
</direct>
<direct name="carry_out" input="adder.cout" output="arithmetic.cout">
<pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
</direct>
<mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
<delay_constant max="25e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="arithmetic.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
<direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
<!--pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/-->
</direct>
<direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
<!--pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/-->
</direct>
<direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
<direct name="direct3" input="arithmetic.out" output="ble5.out"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
<direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
<direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
<direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
<!--pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/-->
</direct>
<direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
<!--pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/-->
</direct>
<direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
<!--pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/-->
</direct>
<complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
<direct name="direct2" input="lut5inter.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
<direct name="carry_in" input="fle.cin" output="lut5inter.cin">
<!--pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/-->
</direct>
<direct name="carry_out" input="lut5inter.cout" output="fle.cout">
<!--pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/-->
</direct>
</interconnect>
</mode>
<!-- Define n2_lut5 mode ends -->
<mode name="n1_lut6">
<pb_type name="ble6" num_pb="1">
<input name="in" num_pins="6"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
<input name="in" num_pins="6" port_class="lut_in"/>
<output name="out" num_pins="1" port_class="lut_out"/>
<!-- LUT timing using delay matrix -->
<delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
229e-12
229e-12
229e-12
229e-12
229e-12
229e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
<direct name="direct2" input="lut6.out" output="ff.D">
<pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble6.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
<delay_constant max="25e-12" in_port="lut6.out" out_port="ble6.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble6.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble6.in"/>
<direct name="direct2" input="ble6.out" output="fle.out[1:1]"/>
<direct name="direct3" input="fle.clk" output="ble6.clk"/>
</interconnect>
</mode>
<!-- Define n1_lut6 mode ends -->
<!-- Define shift register mode begins -->
<mode name="shift_register">
<pb_type name="ble_shift" num_pb="1">
<input name="in" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="regout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ff" blif_model=".subckt shift" num_pb="2" class="flipflop">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble_shift.in" output="ff[0].D"/>
<direct name="direct2" input="ff[0].Q" output="ff[1].D">
<!--pack_pattern name="ble_shift" in_port="ff[0].Q" out_port="ff[1].D"/-->
</direct>
<direct name="out1" input="ff[0].Q" output="ble_shift.out[0]"/>
<direct name="out2" input="ff[1].Q" output="ble_shift.out[1]"/>
<direct name="direct_regout" input="ff[1].Q" output="ble_shift.regout"/>
<complete name="direct3" input="ble_shift.clk" output="ff[1:0].clk"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.regin" output="ble_shift.in"/>
<direct name="direct2" input="ble_shift.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="ble_shift.clk"/>
<direct name="direct4" input="ble_shift.regout" output="fle.regout"/>
</interconnect>
</mode>
<!-- Define shift_register mode end -->
</pb_type>
<interconnect>
<complete name="crossbar0" input="clb.I2 clb.I3 fle[9:0].out" output="fle[9:0].in[0]">
<delay_constant max="190e-12" in_port="clb.I2 clb.I3" out_port="fle[9:0].in[0]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[0]" />
</complete>
<complete name="crossbar1" input="clb.I1 clb.I2 fle[9:0].out" output="fle[9:0].in[1]">
<delay_constant max="190e-12" in_port="clb.I1 clb.I2" out_port="fle[9:0].in[1]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[1]" />
</complete>
<complete name="crossbar2" input="clb.I0 clb.I1 fle[9:0].out" output="fle[9:0].in[2]">
<delay_constant max="190e-12" in_port="clb.I0 clb.I1" out_port="fle[9:0].in[2]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[2]" />
</complete>
<complete name="crossbar3" input="clb.I1 clb.I3 fle[9:0].out" output="fle[9:0].in[3]">
<delay_constant max="190e-12" in_port="clb.I1 clb.I3" out_port="fle[9:0].in[3]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[3]" />
</complete>
<complete name="crossbar4" input="clb.I0 clb.I2 fle[9:0].out" output="fle[9:0].in[4]">
<delay_constant max="190e-12" in_port="clb.I0 clb.I2" out_port="fle[9:0].in[4]" />
<delay_constant max="190e-12" in_port="fle[9:0].out" out_port="fle[9:0].in[4]" />
</complete>
<complete name="crossbar5" input="clb.I0 clb.I3 fle[9:0].out" output="fle[9:0].in[5]">
</complete>
<complete name="clks" input="clb.clk" output="fle[9:0].clk">
</complete>
<complete name="carry_in" input="clb.cin clb.cin_trick fle[9:0].out" output="fle[0:0].cin">
<!-- Put all inter-block carry chain delay on this one edge -->
<!--delay_constant max="0.15e-9" in_port="clb.cin clb.cin_trick" out_port="fle[0:0].cin"/-->
<pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
<!--pack_pattern name="chain" in_port="clb.cin_trick" out_port="fle[0:0].cin"/-->
</complete>
<!--direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
<pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
</direct-->
<direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
<direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
<direct name="cout_copy" input="fle[9:9].cout" output="clb.cout_copy"/>
<!-- Shift register links -->
<direct name="regin" input="clb.regin" output="fle[0:0].regin">
<!-- Put all inter-block carry chain delay on this one edge -->
<delay_constant max="0.15e-9" in_port="clb.regin" out_port="fle[0:0].regin"/>
<pack_pattern name="chain" in_port="clb.regin" out_port="fle[0:0].regin"/>
</direct>
<direct name="regout" input="fle[9:9].regout" output="clb.regout">
<pack_pattern name="chain" in_port="fle[9:9].regout" out_port="clb.regout"/>
</direct>
<direct name="reg_link" input="fle[8:0].regout" output="fle[9:1].regin">
<pack_pattern name="chain" in_port="fle[8:0].regout" out_port="fle[9:1].regin"/>
</direct>
<!-- Carry chain links -->
<direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
<pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
</direct>
<direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
<pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
</direct>
<!-- Scan chain links -->
<direct name="sc_in" input="clb.sc_in" output="fle[0:0].sc_in">
</direct>
<direct name="sc_out" input="fle[9:9].sc_out" output="clb.sc_out">
</direct>
<direct name="sc_link" input="fle[8:0].sc_out" output="fle[9:1].sc_in">
</direct>
</interconnect>
</pb_type>
<!-- Define general purpose logic block (CLB) ends -->
<!-- Define single-mode dual-port memory begin -->
<pb_type name="memory">
<input name="waddr" num_pins="10"/>
<input name="raddr" num_pins="10"/>
<input name="d_in" num_pins="32"/>
<input name="wen" num_pins="1"/>
<input name="ren" num_pins="1"/>
<output name="d_out" num_pins="32"/>
<clock name="clk" num_pins="1"/>
<!-- Specify the 512x32=16Kbit memory block
Note: the delay numbers are extracted from VPR flagship XML without modification
Should align to the process technology we using to create the 16K dual-port RAM
-->
<mode name="mem_512x32_dp">
<pb_type name="mem_512x32_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
<input name="waddr" num_pins="10" port_class="address"/>
<input name="raddr" num_pins="10" port_class="address"/>
<input name="d_in" num_pins="32" port_class="data_in"/>
<input name="wen" num_pins="1" port_class="write_en"/>
<input name="ren" num_pins="1" port_class="write_en"/>
<output name="d_out" num_pins="32" port_class="data_out"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="509e-12" port="mem_512x32_dp.waddr" clock="clk"/>
<T_setup value="509e-12" port="mem_512x32_dp.raddr" clock="clk"/>
<T_setup value="509e-12" port="mem_512x32_dp.d_in" clock="clk"/>
<T_setup value="509e-12" port="mem_512x32_dp.wen" clock="clk"/>
<T_setup value="509e-12" port="mem_512x32_dp.ren" clock="clk"/>
<T_clock_to_Q max="1.234e-9" port="mem_512x32_dp.d_out" clock="clk"/>
<power method="pin-toggle">
<port name="clk" energy_per_toggle="17.9e-12"/>
<static_power power_per_instance="0.0"/>
</power>
</pb_type>
<interconnect>
<direct name="waddress" input="memory.waddr" output="mem_512x32_dp.waddr">
<delay_constant max="132e-12" in_port="memory.waddr" out_port="mem_512x32_dp.waddr"/>
</direct>
<direct name="raddress" input="memory.raddr" output="mem_512x32_dp.raddr">
<delay_constant max="132e-12" in_port="memory.raddr" out_port="mem_512x32_dp.raddr"/>
</direct>
<direct name="data_input" input="memory.d_in" output="mem_512x32_dp.d_in">
<delay_constant max="132e-12" in_port="memory.d_in" out_port="mem_512x32_dp.d_in"/>
</direct>
<direct name="writeen" input="memory.wen" output="mem_512x32_dp.wen">
<delay_constant max="132e-12" in_port="memory.wen" out_port="mem_512x32_dp.wen"/>
</direct>
<direct name="readen" input="memory.ren" output="mem_512x32_dp.ren">
<delay_constant max="132e-12" in_port="memory.ren" out_port="mem_512x32_dp.ren"/>
</direct>
<direct name="dataout" input="mem_512x32_dp.d_out" output="memory.d_out">
<delay_constant max="40e-12" in_port="mem_512x32_dp.d_out" out_port="memory.d_out"/>
</direct>
<direct name="clk" input="memory.clk" output="mem_512x32_dp.clk">
</direct>
</interconnect>
</mode>
</pb_type>
<!-- Define single-mode dual-port memory end -->
</complexblocklist>
</architecture>